From bedbef8ad3e75a304af6361af235302bcc61d06b Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 14 May 2024 06:39:01 +0000
Subject: [PATCH] 修改内核路径
---
kernel/arch/x86/include/asm/processor.h | 381 ++++++++++++++++++------------------------------------
1 files changed, 128 insertions(+), 253 deletions(-)
diff --git a/kernel/arch/x86/include/asm/processor.h b/kernel/arch/x86/include/asm/processor.h
index aa26fe0..d7e017b 100644
--- a/kernel/arch/x86/include/asm/processor.h
+++ b/kernel/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
+struct io_bitmap;
struct vm86;
#include <asm/math_emu.h>
@@ -24,6 +25,7 @@
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
+#include <asm/vmxfeatures.h>
#include <asm/vdso/processor.h>
#include <linux/personality.h>
@@ -43,18 +45,6 @@
#define NET_IP_ALIGN 0
#define HBP_NUM 4
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-static inline void *current_text_addr(void)
-{
- void *pc;
-
- asm volatile("mov $1f, %0; 1:":"=r" (pc));
-
- return pc;
-}
/*
* These alignment constraints are for performance in the vSMP case,
@@ -97,6 +87,9 @@
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
#endif
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+ __u32 vmx_capability[NVMXINTS];
+#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
/* CPUID returned core id bits: */
@@ -106,19 +99,28 @@
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
int cpuid_level;
- __u32 x86_capability[NCAPINTS + NBUGINTS];
+ /*
+ * Align to size of unsigned long because the x86_capability array
+ * is passed to bitops which require the alignment. Use unnamed
+ * union to enforce the array is aligned to size of unsigned long.
+ */
+ union {
+ __u32 x86_capability[NCAPINTS + NBUGINTS];
+ unsigned long x86_capability_alignment;
+ };
char x86_vendor_id[16];
char x86_model_id[64];
/* in KB - valid for CPUS which support this call: */
unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
- /* Cache QoS architectural values: */
+ /* Cache QoS architectural values, valid only on the BSP: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
+ int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
- u16 x86_max_cores;
+ u16 x86_max_cores;
u16 apicid;
u16 initial_apicid;
u16 x86_clflush_size;
@@ -130,6 +132,8 @@
u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
+ u16 cpu_die_id;
+ u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
@@ -156,7 +160,9 @@
#define X86_VENDOR_CENTAUR 5
#define X86_VENDOR_TRANSMETA 7
#define X86_VENDOR_NSC 8
-#define X86_VENDOR_NUM 9
+#define X86_VENDOR_HYGON 9
+#define X86_VENDOR_ZHAOXIN 10
+#define X86_VENDOR_NUM 11
#define X86_VENDOR_UNKNOWN 0xff
@@ -166,7 +172,6 @@
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern struct x86_hw_tss doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
@@ -316,7 +321,13 @@
*/
u64 sp1;
+ /*
+ * Since Linux does not use ring 2, the 'sp2' slot is unused by
+ * hardware. entry_SYSCALL_64 uses it as scratch space to stash
+ * the user RSP value.
+ */
u64 sp2;
+
u64 reserved2;
u64 ist[7];
u32 reserved3;
@@ -331,10 +342,32 @@
* IO-bitmap sizes:
*/
#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP \
+ (offsetof(struct tss_struct, io_bitmap.bitmap) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL \
+ (offsetof(struct tss_struct, io_bitmap.mapall) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT \
+ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+ sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT \
+ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
char stack[PAGE_SIZE];
@@ -344,6 +377,37 @@
struct entry_stack stack;
} __aligned(PAGE_SIZE);
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+ /* The sequence number of the last active bitmap. */
+ u64 prev_sequence;
+
+ /*
+ * Store the dirty size of the last io bitmap offender. The next
+ * one will have to do the cleanup as the switch out to a non io
+ * bitmap user will just set x86_tss.io_bitmap_base to a value
+ * outside of the TSS limit. So for sane tasks there is no need to
+ * actually touch the io_bitmap at all.
+ */
+ unsigned int prev_max;
+
+ /*
+ * The extra 1 is there because the CPU will access an
+ * additional byte beyond the end of the IO permission
+ * bitmap. The extra byte must be all 1 bits, and must
+ * be within the limit.
+ */
+ unsigned long bitmap[IO_BITMAP_LONGS + 1];
+
+ /*
+ * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+ * except the additional byte at the end.
+ */
+ unsigned long mapall[IO_BITMAP_LONGS + 1];
+};
+
struct tss_struct {
/*
* The fixed hardware portion. This must not cross a page boundary
@@ -352,26 +416,17 @@
*/
struct x86_hw_tss x86_tss;
- /*
- * The extra 1 is there because the CPU will access an
- * additional byte beyond the end of the IO permission
- * bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+ struct x86_io_bitmap io_bitmap;
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT \
- (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+/* Per CPU interrupt stacks */
+struct irq_stack {
+ char stack[IRQ_STACK_SIZE];
+} __aligned(IRQ_STACK_SIZE);
+
+DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
@@ -380,45 +435,30 @@
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
#endif
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
- unsigned long ist[7];
-};
-
#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-union irq_stack_union {
- char irq_stack[IRQ_STACK_SIZE];
+struct fixed_percpu_data {
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
- struct {
- char gs_base[40];
- unsigned long stack_canary;
- };
+ char gs_base[40];
+ unsigned long stack_canary;
};
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_union);
+DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
+DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+ return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}
-DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void);
-#if IS_ENABLED(CONFIG_KVM)
/* Save actual FS/GS selectors and bases to current->thread */
-void save_fsgs_for_kvm(void);
-#endif
+void current_save_fsgs(void);
#else /* X86_64 */
#ifdef CONFIG_STACKPROTECTOR
/*
@@ -433,25 +473,14 @@
};
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
-/*
- * per-CPU IRQ handling stacks
- */
-struct irq_stack {
- u32 stack[THREAD_SIZE/sizeof(u32)];
-} __aligned(THREAD_SIZE);
-
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
+/* Per CPU softirq stack pointer */
+DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* X86_64 */
extern unsigned int fpu_kernel_xstate_size;
extern unsigned int fpu_user_xstate_size;
struct perf_event;
-
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
struct thread_struct {
/* Cached TLS descriptors: */
@@ -484,7 +513,7 @@
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
- unsigned long debugreg6;
+ unsigned long virtual_dr6;
/* Keep track of the exact dr7 value set by the user */
unsigned long ptrace_dr7;
/* Fault info: */
@@ -496,15 +525,17 @@
struct vm86 *vm86;
#endif
/* IO permissions: */
- unsigned long *io_bitmap_ptr;
- unsigned long iopl;
- /* Max allowed port in the bitmap, in bytes: */
- unsigned io_bitmap_max;
+ struct io_bitmap *io_bitmap;
- mm_segment_t addr_limit;
+ /*
+ * IOPL. Priviledge level dependent I/O permission which is
+ * emulated via the I/O bitmap to prevent user space from disabling
+ * interrupts.
+ */
+ unsigned long iopl_emul;
+ unsigned int iopl_warn:1;
unsigned int sig_on_uaccess_err:1;
- unsigned int uaccess_err:1; /* uaccess failed */
/* Floating point and extended processor state */
struct fpu fpu;
@@ -522,32 +553,13 @@
*size = fpu_kernel_xstate_size;
}
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
-{
-#ifdef CONFIG_X86_32
- unsigned int reg;
-
- asm volatile ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
-}
-
static inline void
native_load_sp0(unsigned long sp0)
{
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
-static inline void native_swapgs(void)
+static __always_inline void native_swapgs(void)
{
#ifdef CONFIG_X86_64
asm volatile("swapgs" ::: "memory");
@@ -570,7 +582,7 @@
current_stack_pointer) < THREAD_SIZE;
}
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
@@ -580,8 +592,7 @@
native_load_sp0(sp0);
}
-#define set_iopl_mask native_set_iopl_mask
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
@@ -651,72 +662,6 @@
return edx;
}
-/*
- * This function forces the icache and prefetched instruction stream to
- * catch up with reality in two very specific cases:
- *
- * a) Text was modified using one virtual address and is about to be executed
- * from the same physical page at a different virtual address.
- *
- * b) Text was modified on a different CPU, may subsequently be
- * executed on this CPU, and you want to make sure the new version
- * gets executed. This generally means you're calling this in a IPI.
- *
- * If you're calling this for a different reason, you're probably doing
- * it wrong.
- */
-static inline void sync_core(void)
-{
- /*
- * There are quite a few ways to do this. IRET-to-self is nice
- * because it works on every CPU, at any CPL (so it's compatible
- * with paravirtualization), and it never exits to a hypervisor.
- * The only down sides are that it's a bit slow (it seems to be
- * a bit more than 2x slower than the fastest options) and that
- * it unmasks NMIs. The "push %cs" is needed because, in
- * paravirtual environments, __KERNEL_CS may not be a valid CS
- * value when we do IRET directly.
- *
- * In case NMI unmasking or performance ever becomes a problem,
- * the next best option appears to be MOV-to-CR2 and an
- * unconditional jump. That sequence also works on all CPUs,
- * but it will fault at CPL3 (i.e. Xen PV).
- *
- * CPUID is the conventional way, but it's nasty: it doesn't
- * exist on some 486-like CPUs, and it usually exits to a
- * hypervisor.
- *
- * Like all of Linux's memory ordering operations, this is a
- * compiler barrier as well.
- */
-#ifdef CONFIG_X86_32
- asm volatile (
- "pushfl\n\t"
- "pushl %%cs\n\t"
- "pushl $1f\n\t"
- "iret\n\t"
- "1:"
- : ASM_CALL_CONSTRAINT : : "memory");
-#else
- unsigned int tmp;
-
- asm volatile (
- UNWIND_HINT_SAVE
- "mov %%ss, %0\n\t"
- "pushq %q0\n\t"
- "pushq %%rsp\n\t"
- "addq $8, (%%rsp)\n\t"
- "pushfq\n\t"
- "mov %%cs, %0\n\t"
- "pushq %q0\n\t"
- "pushq $1f\n\t"
- "iretq\n\t"
- UNWIND_HINT_RESTORE
- "1:"
- : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
-#endif
-}
-
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void amd_e400_c1e_apic_setup(void);
@@ -728,7 +673,6 @@
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
-void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
@@ -738,6 +682,9 @@
extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
+extern void cpu_init_secondary(void);
+extern void cpu_init_exception_handling(void);
+extern void cr4_init(void);
static inline unsigned long get_debugctlmsr(void)
{
@@ -798,7 +745,7 @@
* Useful for spinlocks to avoid one state transition in the
* cache coherency protocol:
*/
-static inline void prefetchw(const void *x)
+static __always_inline void prefetchw(const void *x)
{
alternative_input(BASE_PREFETCH, "prefetchw %P1",
X86_FEATURE_3DNOWPREFETCH,
@@ -823,68 +770,15 @@
})
#ifdef CONFIG_X86_32
-/*
- * User space process size: 3GB (default).
- */
-#define IA32_PAGE_OFFSET PAGE_OFFSET
-#define TASK_SIZE PAGE_OFFSET
-#define TASK_SIZE_LOW TASK_SIZE
-#define TASK_SIZE_MAX TASK_SIZE
-#define DEFAULT_MAP_WINDOW TASK_SIZE
-#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX STACK_TOP
-
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
- .io_bitmap_ptr = NULL, \
- .addr_limit = KERNEL_DS, \
}
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
-/*
- * User space process size. This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen. This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
-#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
- 0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-
-#define STACK_TOP TASK_SIZE_LOW
-#define STACK_TOP_MAX TASK_SIZE_MAX
-
-#define INIT_THREAD { \
- .addr_limit = KERNEL_DS, \
-}
+#define INIT_THREAD { }
extern unsigned long KSTK_ESP(struct task_struct *task);
@@ -911,30 +805,16 @@
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
-/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
-#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
-
-#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(void);
-extern int mpx_disable_management(void);
-#else
-static inline int mpx_enable_management(void)
-{
- return -EINVAL;
-}
-static inline int mpx_disable_management(void)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_X86_INTEL_MPX */
-
#ifdef CONFIG_CPU_SUP_AMD
extern u16 amd_get_nb_id(int cpu);
extern u32 amd_get_nodes_per_socket(void);
+extern bool cpu_has_ibpb_brtype_microcode(void);
+extern void amd_clear_divider(void);
#else
static inline u16 amd_get_nb_id(int cpu) { return 0; }
static inline u32 amd_get_nodes_per_socket(void) { return 0; }
+static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
+static inline void amd_clear_divider(void) { }
#endif
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
@@ -953,8 +833,8 @@
}
extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-extern void free_kernel_image_pages(void *begin, void *end);
+void free_init_pages(const char *what, unsigned long begin, unsigned long end);
+extern void free_kernel_image_pages(const char *what, void *begin, void *end);
void default_idle(void);
#ifdef CONFIG_XEN
@@ -963,9 +843,9 @@
#define xen_set_default_idle 0
#endif
-void stop_this_cpu(void *dummy);
-void df_debug(struct pt_regs *regs, long error_code);
-void microcode_check(void);
+void __noreturn stop_this_cpu(void *dummy);
+void microcode_check(struct cpuinfo_x86 *prev_info);
+void store_cpu_caps(struct cpuinfo_x86 *info);
enum l1tf_mitigations {
L1TF_MITIGATION_OFF,
@@ -984,11 +864,6 @@
MDS_MITIGATION_VMWERV,
};
-enum taa_mitigations {
- TAA_MITIGATION_OFF,
- TAA_MITIGATION_UCODE_NEEDED,
- TAA_MITIGATION_VERW,
- TAA_MITIGATION_TSX_DISABLED,
-};
+extern bool gds_ucode_mitigated(void);
#endif /* _ASM_X86_PROCESSOR_H */
--
Gitblit v1.6.2