From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/arch/x86/kernel/ptrace.c | 289 ++++++++++++++++++---------------------------------------
1 files changed, 92 insertions(+), 197 deletions(-)
diff --git a/kernel/arch/x86/kernel/ptrace.c b/kernel/arch/x86/kernel/ptrace.c
index 1401f86..bedca01 100644
--- a/kernel/arch/x86/kernel/ptrace.c
+++ b/kernel/arch/x86/kernel/ptrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* By Ross Biro 1/23/92 */
/*
* Pentium III FXSR, SSE support
@@ -27,7 +28,6 @@
#include <linux/nospec.h>
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
@@ -40,7 +40,8 @@
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/syscall.h>
-#include <asm/mmu_context.h>
+#include <asm/fsgsbase.h>
+#include <asm/io_bitmap.h>
#include "tls.h"
@@ -154,35 +155,6 @@
#define FLAG_MASK FLAG_MASK_32
-/*
- * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
- * when it traps. The previous stack will be directly underneath the saved
- * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'.
- *
- * Now, if the stack is empty, '®s->sp' is out of range. In this
- * case we try to take the previous stack. To always return a non-null
- * stack pointer we fall back to regs as stack if no previous stack
- * exists.
- *
- * This is valid only for kernel mode traps.
- */
-unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
- unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
- unsigned long sp = (unsigned long)®s->sp;
- u32 *prev_esp;
-
- if (context == (sp & ~(THREAD_SIZE - 1)))
- return sp;
-
- prev_esp = (u32 *)(context);
- if (*prev_esp)
- return (unsigned long)*prev_esp;
-
- return (unsigned long)regs;
-}
-EXPORT_SYMBOL_GPL(kernel_stack_pointer);
-
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
@@ -209,6 +181,9 @@
static int set_segment_reg(struct task_struct *task,
unsigned long offset, u16 value)
{
+ if (WARN_ON_ONCE(task == current))
+ return -EIO;
+
/*
* The value argument was already truncated to 16 bits.
*/
@@ -229,16 +204,14 @@
case offsetof(struct user_regs_struct, ss):
if (unlikely(value == 0))
return -EIO;
+ fallthrough;
default:
*pt_regs_access(task_pt_regs(task), offset) = value;
break;
case offsetof(struct user_regs_struct, gs):
- if (task == current)
- set_user_gs(task_pt_regs(task), value);
- else
- task_user_gs(task) = value;
+ task_user_gs(task) = value;
}
return 0;
@@ -298,32 +271,33 @@
static int set_segment_reg(struct task_struct *task,
unsigned long offset, u16 value)
{
+ if (WARN_ON_ONCE(task == current))
+ return -EIO;
+
/*
* The value argument was already truncated to 16 bits.
*/
if (invalid_selector(value))
return -EIO;
+ /*
+ * Writes to FS and GS will change the stored selector. Whether
+ * this changes the segment base as well depends on whether
+ * FSGSBASE is enabled.
+ */
+
switch (offset) {
case offsetof(struct user_regs_struct,fs):
task->thread.fsindex = value;
- if (task == current)
- loadsegment(fs, task->thread.fsindex);
break;
case offsetof(struct user_regs_struct,gs):
task->thread.gsindex = value;
- if (task == current)
- load_gs_index(task->thread.gsindex);
break;
case offsetof(struct user_regs_struct,ds):
task->thread.ds = value;
- if (task == current)
- loadsegment(ds, task->thread.ds);
break;
case offsetof(struct user_regs_struct,es):
task->thread.es = value;
- if (task == current)
- loadsegment(es, task->thread.es);
break;
/*
@@ -342,49 +316,6 @@
}
return 0;
-}
-
-static unsigned long task_seg_base(struct task_struct *task,
- unsigned short selector)
-{
- unsigned short idx = selector >> 3;
- unsigned long base;
-
- if (likely((selector & SEGMENT_TI_MASK) == 0)) {
- if (unlikely(idx >= GDT_ENTRIES))
- return 0;
-
- /*
- * There are no user segments in the GDT with nonzero bases
- * other than the TLS segments.
- */
- if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
- return 0;
-
- idx -= GDT_ENTRY_TLS_MIN;
- base = get_desc_base(&task->thread.tls_array[idx]);
- } else {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
-
- /*
- * If performance here mattered, we could protect the LDT
- * with RCU. This is a slow path, though, so we can just
- * take the mutex.
- */
- mutex_lock(&task->mm->context.lock);
- ldt = task->mm->context.ldt;
- if (unlikely(!ldt || idx >= ldt->nr_entries))
- base = 0;
- else
- base = get_desc_base(ldt->entries + idx);
- mutex_unlock(&task->mm->context.lock);
-#else
- base = 0;
-#endif
- }
-
- return base;
}
#endif /* CONFIG_X86_32 */
@@ -440,22 +371,12 @@
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_MAX)
return -EIO;
- /*
- * When changing the segment base, use do_arch_prctl_64
- * to set either thread.fs or thread.fsindex and the
- * corresponding GDT slot.
- */
- if (child->thread.fsbase != value)
- return do_arch_prctl_64(child, ARCH_SET_FS, value);
+ x86_fsbase_write_task(child, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
- /*
- * Exactly the same here as the %fs handling above.
- */
if (value >= TASK_SIZE_MAX)
return -EIO;
- if (child->thread.gsbase != value)
- return do_arch_prctl_64(child, ARCH_SET_GS, value);
+ x86_gsbase_write_task(child, value);
return 0;
#endif
}
@@ -479,18 +400,10 @@
return get_flags(task);
#ifdef CONFIG_X86_64
- case offsetof(struct user_regs_struct, fs_base): {
- if (task->thread.fsindex == 0)
- return task->thread.fsbase;
- else
- return task_seg_base(task, task->thread.fsindex);
- }
- case offsetof(struct user_regs_struct, gs_base): {
- if (task->thread.gsindex == 0)
- return task->thread.gsbase;
- else
- return task_seg_base(task, task->thread.gsindex);
- }
+ case offsetof(struct user_regs_struct, fs_base):
+ return x86_fsbase_read_task(task);
+ case offsetof(struct user_regs_struct, gs_base):
+ return x86_gsbase_read_task(task);
#endif
}
@@ -499,26 +412,12 @@
static int genregs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- if (kbuf) {
- unsigned long *k = kbuf;
- while (count >= sizeof(*k)) {
- *k++ = getreg(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- unsigned long __user *u = ubuf;
- while (count >= sizeof(*u)) {
- if (__put_user(getreg(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ int reg;
+ for (reg = 0; to.left; reg++)
+ membuf_store(&to, getreg(target, reg * sizeof(unsigned long)));
return 0;
}
@@ -566,7 +465,7 @@
break;
}
- thread->debugreg6 |= (DR_TRAP0 << i);
+ thread->virtual_dr6 |= (DR_TRAP0 << i);
}
/*
@@ -702,7 +601,7 @@
if (bp)
val = bp->hw.info.address;
} else if (n == 6) {
- val = thread->debugreg6;
+ val = thread->virtual_dr6 ^ DR6_RESERVED; /* Flip back to arch polarity */
} else if (n == 7) {
val = thread->ptrace_dr7;
}
@@ -758,7 +657,7 @@
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
} else if (n == 6) {
- thread->debugreg6 = val;
+ thread->virtual_dr6 = val ^ DR6_RESERVED; /* Flip to positive polarity */
rc = 0;
} else if (n == 7) {
rc = ptrace_write_dr7(tsk, val);
@@ -775,20 +674,21 @@
static int ioperm_active(struct task_struct *target,
const struct user_regset *regset)
{
- return target->thread.io_bitmap_max / regset->size;
+ struct io_bitmap *iobm = target->thread.io_bitmap;
+
+ return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0;
}
static int ioperm_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- if (!target->thread.io_bitmap_ptr)
+ struct io_bitmap *iobm = target->thread.io_bitmap;
+
+ if (!iobm)
return -ENXIO;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.io_bitmap_ptr,
- 0, IO_BITMAP_BYTES);
+ return membuf_write(&to, iobm->bitmap, IO_BITMAP_BYTES);
}
/*
@@ -799,9 +699,6 @@
void ptrace_disable(struct task_struct *child)
{
user_disable_single_step(child);
-#ifdef TIF_SYSCALL_EMU
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-#endif
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -946,14 +843,39 @@
static int putreg32(struct task_struct *child, unsigned regno, u32 value)
{
struct pt_regs *regs = task_pt_regs(child);
+ int ret;
switch (regno) {
SEG32(cs);
SEG32(ds);
SEG32(es);
- SEG32(fs);
- SEG32(gs);
+
+ /*
+ * A 32-bit ptracer on a 64-bit kernel expects that writing
+ * FS or GS will also update the base. This is needed for
+ * operations like PTRACE_SETREGS to fully restore a saved
+ * CPU state.
+ */
+
+ case offsetof(struct user32, regs.fs):
+ ret = set_segment_reg(child,
+ offsetof(struct user_regs_struct, fs),
+ value);
+ if (ret == 0)
+ child->thread.fsbase =
+ x86_fsgsbase_read_task(child, value);
+ return ret;
+
+ case offsetof(struct user32, regs.gs):
+ ret = set_segment_reg(child,
+ offsetof(struct user_regs_struct, gs),
+ value);
+ if (ret == 0)
+ child->thread.gsbase =
+ x86_fsgsbase_read_task(child, value);
+ return ret;
+
SEG32(ss);
R32(ebx, bx);
@@ -1069,28 +991,15 @@
static int genregs32_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count >= sizeof(*k)) {
- getreg32(target, pos, k++);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count >= sizeof(*u)) {
- compat_ulong_t word;
- getreg32(target, pos, &word);
- if (__put_user(word, u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ int reg;
+ for (reg = 0; to.left; reg++) {
+ u32 val;
+ getreg32(target, reg * 4, &val);
+ membuf_store(&to, val);
+ }
return 0;
}
@@ -1300,25 +1209,25 @@
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_regs_struct) / sizeof(long),
.size = sizeof(long), .align = sizeof(long),
- .get = genregs_get, .set = genregs_set
+ .regset_get = genregs_get, .set = genregs_set
},
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
.n = sizeof(struct user_i387_struct) / sizeof(long),
.size = sizeof(long), .align = sizeof(long),
- .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
+ .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
[REGSET_XSTATE] = {
.core_note_type = NT_X86_XSTATE,
.size = sizeof(u64), .align = sizeof(u64),
- .active = xstateregs_active, .get = xstateregs_get,
+ .active = xstateregs_active, .regset_get = xstateregs_get,
.set = xstateregs_set
},
[REGSET_IOPERM64] = {
.core_note_type = NT_386_IOPERM,
.n = IO_BITMAP_LONGS,
.size = sizeof(long), .align = sizeof(long),
- .active = ioperm_active, .get = ioperm_get
+ .active = ioperm_active, .regset_get = ioperm_get
},
};
@@ -1341,24 +1250,24 @@
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_regs_struct32) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
- .get = genregs32_get, .set = genregs32_set
+ .regset_get = genregs32_get, .set = genregs32_set
},
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
.n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
- .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set
+ .active = regset_fpregs_active, .regset_get = fpregs_get, .set = fpregs_set
},
[REGSET_XFP] = {
.core_note_type = NT_PRXFPREG,
.n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
- .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
+ .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
[REGSET_XSTATE] = {
.core_note_type = NT_X86_XSTATE,
.size = sizeof(u64), .align = sizeof(u64),
- .active = xstateregs_active, .get = xstateregs_get,
+ .active = xstateregs_active, .regset_get = xstateregs_get,
.set = xstateregs_set
},
[REGSET_TLS] = {
@@ -1367,13 +1276,13 @@
.size = sizeof(struct user_desc),
.align = sizeof(struct user_desc),
.active = regset_tls_active,
- .get = regset_tls_get, .set = regset_tls_set
+ .regset_get = regset_tls_get, .set = regset_tls_set
},
[REGSET_IOPERM32] = {
.core_note_type = NT_386_IOPERM,
.n = IO_BITMAP_BYTES / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
- .active = ioperm_active, .get = ioperm_get
+ .active = ioperm_active, .regset_get = ioperm_get
},
};
@@ -1413,33 +1322,19 @@
#endif
}
-static void fill_sigtrap_info(struct task_struct *tsk,
- struct pt_regs *regs,
- int error_code, int si_code,
- struct siginfo *info)
+void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
{
+ struct task_struct *tsk = current;
+
tsk->thread.trap_nr = X86_TRAP_DB;
tsk->thread.error_code = error_code;
- info->si_signo = SIGTRAP;
- info->si_code = si_code;
- info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
-}
-
-void user_single_step_siginfo(struct task_struct *tsk,
- struct pt_regs *regs,
- struct siginfo *info)
-{
- fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
-}
-
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
-{
- struct siginfo info;
-
- clear_siginfo(&info);
- fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
/* Send us the fake SIGTRAP */
- force_sig_info(SIGTRAP, &info, tsk);
+ force_sig_fault(SIGTRAP, si_code,
+ user_mode(regs) ? (void __user *)regs->ip : NULL);
+}
+
+void user_single_step_report(struct pt_regs *regs)
+{
+ send_sigtrap(regs, 0, TRAP_BRKPT);
}
--
Gitblit v1.6.2