From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/arch/x86/kernel/fpu/xstate.c | 732 ++++++++++++++++++++++++++++++++++---------------------
1 files changed, 447 insertions(+), 285 deletions(-)
diff --git a/kernel/arch/x86/kernel/fpu/xstate.c b/kernel/arch/x86/kernel/fpu/xstate.c
index 7d372db..80836b9 100644
--- a/kernel/arch/x86/kernel/fpu/xstate.c
+++ b/kernel/arch/x86/kernel/fpu/xstate.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* xsave/xrstor support.
*
@@ -7,6 +8,8 @@
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/pkeys.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -34,6 +37,7 @@
"AVX-512 ZMM_Hi256" ,
"Processor Trace (unused)" ,
"Protection Keys User registers",
+ "PASID state",
"unknown xstate feature" ,
};
@@ -48,16 +52,19 @@
X86_FEATURE_AVX512F,
X86_FEATURE_INTEL_PT,
X86_FEATURE_PKU,
+ X86_FEATURE_ENQCMD,
};
/*
- * Mask of xstate features supported by the CPU and the kernel:
+ * This represents the full set of bits that should ever be set in a kernel
+ * XSAVE buffer, both supervisor and user xstates.
*/
-u64 xfeatures_mask __read_mostly;
+u64 xfeatures_mask_all __read_mostly;
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
/*
* The XSAVE area of kernel can be in standard or compacted format;
@@ -67,22 +74,13 @@
unsigned int fpu_user_xstate_size;
/*
- * Clear all of the X86_FEATURE_* bits that are unavailable
- * when the CPU has no XSAVE support.
- */
-void fpu__xstate_clear_all_cpu_caps(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-}
-
-/*
* Return whether the system supports a given xfeature.
*
* Also return the name of the (most advanced) feature that the caller requested:
*/
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{
- u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
+ u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all;
if (unlikely(feature_name)) {
long xfeature_idx, max_idx;
@@ -113,25 +111,17 @@
}
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
-static int xfeature_is_supervisor(int xfeature_nr)
+static bool xfeature_is_supervisor(int xfeature_nr)
{
/*
- * We currently do not support supervisor states, but if
- * we did, we could find out like this.
- *
- * SDM says: If state component 'i' is a user state component,
- * ECX[0] return 0; if state component i is a supervisor
- * state component, ECX[0] returns 1.
+ * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1)
+ * returns ECX[0] set to (1) for a supervisor state, and cleared (0)
+ * for a user state.
*/
u32 eax, ebx, ecx, edx;
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
- return !!(ecx & 1);
-}
-
-static int xfeature_is_user(int xfeature_nr)
-{
- return !xfeature_is_supervisor(xfeature_nr);
+ return ecx & 1;
}
/*
@@ -164,7 +154,7 @@
* None of the feature bits are in init state. So nothing else
* to do for us, as the memory layout is up to date.
*/
- if ((xfeatures & xfeatures_mask) == xfeatures_mask)
+ if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all)
return;
/*
@@ -191,7 +181,7 @@
* in a special way already:
*/
feature_bit = 0x2;
- xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
+ xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2;
/*
* Update all the remaining memory layouts according to their
@@ -219,30 +209,41 @@
*/
void fpu__init_cpu_xstate(void)
{
- if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
+ u64 unsup_bits;
+
+ if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
return;
/*
- * Make it clear that XSAVES supervisor states are not yet
- * implemented should anyone expect it to work by changing
- * bits in XFEATURE_MASK_* macros and XCR0.
+ * Unsupported supervisor xstates should not be found in
+ * the xfeatures mask.
*/
- WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
- "x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+ unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
+ WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n",
+ unsup_bits);
- xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
+ xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED;
cr4_set_bits(X86_CR4_OSXSAVE);
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+
+ /*
+ * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
+ * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
+ * states can be set here.
+ */
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+ /*
+ * MSR_IA32_XSS sets supervisor states managed by XSAVES.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ xfeatures_mask_dynamic());
+ }
}
-/*
- * Note that in the future we will likely need a pair of
- * functions here: one for user xstates and the other for
- * system xstates. For now, they are the same.
- */
-static int xfeature_enabled(enum xfeature xfeature)
+static bool xfeature_enabled(enum xfeature xfeature)
{
- return !!(xfeatures_mask & (1UL << xfeature));
+ return xfeatures_mask_all & BIT_ULL(xfeature);
}
/*
@@ -260,10 +261,13 @@
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_offsets[0] = 0;
- xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
- xstate_offsets[1] = xstate_sizes[0];
- xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
+ xstate_offsets[XFEATURE_FP] = 0;
+ xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
+ xmm_space);
+
+ xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
+ xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
+ xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!xfeature_enabled(i))
@@ -271,21 +275,25 @@
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
- /*
- * If an xfeature is supervisor state, the offset
- * in EBX is invalid. We leave it to -1.
- */
- if (xfeature_is_user(i))
- xstate_offsets[i] = ebx;
-
xstate_sizes[i] = eax;
+
/*
- * In our xstate size checks, we assume that the
- * highest-numbered xstate feature has the
- * highest offset in the buffer. Ensure it does.
+ * If an xfeature is supervisor state, the offset in EBX is
+ * invalid, leave it to -1.
+ */
+ if (xfeature_is_supervisor(i))
+ continue;
+
+ xstate_offsets[i] = ebx;
+
+ /*
+ * In our xstate size checks, we assume that the highest-numbered
+ * xstate feature has the highest offset in the buffer. Ensure
+ * it does.
*/
WARN_ONCE(last_good_offset > xstate_offsets[i],
- "x86/fpu: misordered xstate at %d\n", last_good_offset);
+ "x86/fpu: misordered xstate at %d\n", last_good_offset);
+
last_good_offset = xstate_offsets[i];
}
}
@@ -312,6 +320,7 @@
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
print_xstate_feature(XFEATURE_MASK_PKRU);
+ print_xstate_feature(XFEATURE_MASK_PASID);
}
/*
@@ -332,6 +341,13 @@
u32 eax, ebx, ecx, edx;
CHECK_XFEATURE(xfeature_nr);
+
+ if (!xfeature_enabled(xfeature_nr)) {
+ WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
+ xfeature_nr);
+ return 0;
+ }
+
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
/*
* The value returned by ECX[1] indicates the alignment
@@ -344,11 +360,11 @@
/*
* This function sets up offsets and sizes of all extended states in
* xsave area. This supports both standard format and compacted format
- * of the xsave aread.
+ * of the xsave area.
*/
-static void __init setup_xstate_comp(void)
+static void __init setup_xstate_comp_offsets(void)
{
- unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
+ unsigned int next_offset;
int i;
/*
@@ -356,36 +372,56 @@
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_comp_offsets[0] = 0;
- xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
+ xstate_comp_offsets[XFEATURE_FP] = 0;
+ xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
+ xmm_space);
if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i)) {
+ if (xfeature_enabled(i))
xstate_comp_offsets[i] = xstate_offsets[i];
- xstate_comp_sizes[i] = xstate_sizes[i];
- }
}
return;
}
- xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
- FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i))
- xstate_comp_sizes[i] = xstate_sizes[i];
- else
- xstate_comp_sizes[i] = 0;
+ if (!xfeature_enabled(i))
+ continue;
- if (i > FIRST_EXTENDED_XFEATURE) {
- xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
- + xstate_comp_sizes[i-1];
+ if (xfeature_is_aligned(i))
+ next_offset = ALIGN(next_offset, 64);
- if (xfeature_is_aligned(i))
- xstate_comp_offsets[i] =
- ALIGN(xstate_comp_offsets[i], 64);
- }
+ xstate_comp_offsets[i] = next_offset;
+ next_offset += xstate_sizes[i];
+ }
+}
+
+/*
+ * Setup offsets of a supervisor-state-only XSAVES buffer:
+ *
+ * The offsets stored in xstate_comp_offsets[] only work for one specific
+ * value of the Requested Feature BitMap (RFBM). In cases where a different
+ * RFBM value is used, a different set of offsets is required. This set of
+ * offsets is for when RFBM=xfeatures_mask_supervisor().
+ */
+static void __init setup_supervisor_only_offsets(void)
+{
+ unsigned int next_offset;
+ int i;
+
+ next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+ if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
+ continue;
+
+ if (xfeature_is_aligned(i))
+ next_offset = ALIGN(next_offset, 64);
+
+ xstate_supervisor_only_offsets[i] = next_offset;
+ next_offset += xstate_sizes[i];
}
}
@@ -420,7 +456,8 @@
XFEATURE_MASK_Hi16_ZMM | \
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR)
+ XFEATURE_MASK_BNDCSR | \
+ XFEATURE_MASK_PASID)
/*
* setup the xstate image representing the init state
@@ -429,7 +466,9 @@
{
static int on_boot_cpu __initdata = 1;
- BUILD_BUG_ON(XCNTXT_MASK != XFEATURES_INIT_FPSTATE_HANDLED);
+ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
+ XFEATURES_INIT_FPSTATE_HANDLED);
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -441,7 +480,8 @@
print_xstate_features();
if (boot_cpu_has(X86_FEATURE_XSAVES))
- init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
+ init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+ xfeatures_mask_all;
/*
* Init all the features state with header.xfeatures being 0x0
@@ -476,7 +516,7 @@
* format. Checking a supervisor state's uncompacted offset is
* an error.
*/
- if (XFEATURE_MASK_SUPERVISOR & (1 << xfeature_nr)) {
+ if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
return -1;
}
@@ -486,7 +526,7 @@
return ebx;
}
-static int xfeature_size(int xfeature_nr)
+int xfeature_size(int xfeature_nr)
{
u32 eax, ebx, ecx, edx;
@@ -510,10 +550,10 @@
}
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-int validate_xstate_header(const struct xstate_header *hdr)
+int validate_user_xstate_header(const struct xstate_header *hdr)
{
/* No unknown or supervisor features may be set */
- if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+ if (hdr->xfeatures & ~xfeatures_mask_user())
return -EINVAL;
/* Userspace must use the uncompacted format */
@@ -590,6 +630,7 @@
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
+ XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
/*
* Make *SURE* to add any feature numbers in below if
@@ -598,7 +639,8 @@
*/
if ((nr < XFEATURE_YMM) ||
(nr >= XFEATURE_MAX) ||
- (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
+ (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
+ ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) {
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
XSTATE_WARN_ON(1);
}
@@ -608,6 +650,10 @@
* This essentially double-checks what the cpu told us about
* how large the XSAVE buffer needs to be. We are recalculating
* it to be safe.
+ *
+ * Dynamic XSAVE features allocate their own buffers and are not
+ * covered by these checks. Only the size of the buffer for task->fpu
+ * is checked here.
*/
static void do_extra_xstate_size_checks(void)
{
@@ -648,15 +694,12 @@
/*
- * Get total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0 | IA32_XSS.
*
* Note the SDM's wording here. "sub-function 0" only enumerates
* the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too.
- *
- * Note that we do not currently set any bits on IA32_XSS so
- * 'XCR0 | IA32_XSS == XCR0' for now.
*/
static unsigned int __init get_xsaves_size(void)
{
@@ -671,6 +714,33 @@
*/
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
return ebx;
+}
+
+/*
+ * Get the total size of the enabled xstates without the dynamic supervisor
+ * features.
+ */
+static unsigned int __init get_xsaves_size_no_dynamic(void)
+{
+ u64 mask = xfeatures_mask_dynamic();
+ unsigned int size;
+
+ if (!mask)
+ return get_xsaves_size();
+
+ /* Disable dynamic features. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+
+ /*
+ * Ask the hardware what size is required of the buffer.
+ * This is the size required for the task->fpu buffer.
+ */
+ size = get_xsaves_size();
+
+ /* Re-enable dynamic features so XSAVES will work on them again. */
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+
+ return size;
}
static unsigned int __init get_xsave_size(void)
@@ -701,7 +771,7 @@
return false;
}
-static int init_xstate_size(void)
+static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
unsigned int possible_xstate_size;
@@ -710,7 +780,7 @@
xsave_size = get_xsave_size();
if (boot_cpu_has(X86_FEATURE_XSAVES))
- possible_xstate_size = get_xsaves_size();
+ possible_xstate_size = get_xsaves_size_no_dynamic();
else
possible_xstate_size = xsave_size;
@@ -738,9 +808,9 @@
*/
static void fpu__init_disable_system_xstate(void)
{
- xfeatures_mask = 0;
+ xfeatures_mask_all = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
- fpu__xstate_clear_all_cpu_caps();
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
}
/*
@@ -773,16 +843,26 @@
return;
}
+ /*
+ * Find user xstates supported by the processor.
+ */
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- xfeatures_mask = eax + ((u64)edx << 32);
+ xfeatures_mask_all = eax + ((u64)edx << 32);
- if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+ /*
+ * Find supervisor xstates supported by the processor.
+ */
+ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+ xfeatures_mask_all |= ecx + ((u64)edx << 32);
+
+ if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
/*
* This indicates that something really unexpected happened
* with the enumeration. Disable XSAVE and try to continue
* booting without it. This is too early to BUG().
*/
- pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
+ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
+ xfeatures_mask_all);
goto out_disable;
}
@@ -791,10 +871,10 @@
*/
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
if (!boot_cpu_has(xsave_cpuid_features[i]))
- xfeatures_mask &= ~BIT(i);
+ xfeatures_mask_all &= ~BIT_ULL(i);
}
- xfeatures_mask &= fpu__get_supported_xfeatures_mask();
+ xfeatures_mask_all &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
@@ -806,15 +886,16 @@
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
*/
- update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
+ update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user());
fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
- setup_xstate_comp();
+ setup_xstate_comp_offsets();
+ setup_supervisor_only_offsets();
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
- xfeatures_mask,
+ xfeatures_mask_all,
fpu_kernel_xstate_size,
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
return;
@@ -833,26 +914,31 @@
* Restore XCR0 on xsave capable CPUs:
*/
if (boot_cpu_has(X86_FEATURE_XSAVE))
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user());
+
+ /*
+ * Restore IA32_XSS. The same CPUID bit enumerates support
+ * of XSAVES and MSR_IA32_XSS.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ xfeatures_mask_dynamic());
+ }
}
/*
- * Given an xstate feature mask, calculate where in the xsave
+ * Given an xstate feature nr, calculate where in the xsave
* buffer the state is. Callers should ensure that the buffer
* is valid.
- *
- * Note: does not work for compacted buffers.
*/
-void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
+static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
- int feature_nr = fls64(xstate_feature_mask) - 1;
-
- if (!xfeature_enabled(feature_nr)) {
+ if (!xfeature_enabled(xfeature_nr)) {
WARN_ON_FPU(1);
return NULL;
}
- return (void *)xsave + xstate_comp_offsets[feature_nr];
+ return (void *)xsave + xstate_comp_offsets[xfeature_nr];
}
/*
* Given the xsave area and a state inside, this function returns the
@@ -866,13 +952,13 @@
*
* Inputs:
* xstate: the thread's storage area for all FPU data
- * xstate_feature: state which is defined in xsave.h (e.g.
- * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
+ * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ * XFEATURE_SSE, etc...)
* Output:
* address of the state in the xsave area, or NULL if the
* field is not present in the xsave buffer.
*/
-void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
+void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
/*
* Do we even *have* xsave state?
@@ -882,14 +968,13 @@
/*
* We should not ever be requesting features that we
- * have not enabled. Remember that pcntxt_mask is
- * what we write to the XCR0 register.
+ * have not enabled.
*/
- WARN_ONCE(!(xfeatures_mask & xstate_feature),
+ WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
"get of unsupported state");
/*
* This assumes the last 'xsave*' instruction to
- * have requested that 'xstate_feature' be saved.
+ * have requested that 'xfeature_nr' be saved.
* If it did not, we might be seeing and old value
* of the field in the buffer.
*
@@ -898,10 +983,10 @@
* or because the "init optimization" caused it
* to not be saved.
*/
- if (!(xsave->header.xfeatures & xstate_feature))
+ if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
return NULL;
- return __raw_xsave_addr(xsave, xstate_feature);
+ return __raw_xsave_addr(xsave, xfeature_nr);
}
EXPORT_SYMBOL_GPL(get_xsave_addr);
@@ -916,25 +1001,23 @@
* Note that this only works on the current task.
*
* Inputs:
- * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- * XFEATURE_MASK_SSE, etc...)
+ * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ * XFEATURE_SSE, etc...)
* Output:
* address of the state in the xsave area or NULL if the state
* is not present or is in its 'init state'.
*/
-const void *get_xsave_field_ptr(int xsave_state)
+const void *get_xsave_field_ptr(int xfeature_nr)
{
struct fpu *fpu = ¤t->thread.fpu;
- if (!fpu->initialized)
- return NULL;
/*
* fpu__save() takes the CPU's xstate registers
* and saves them off to the 'fpu memory buffer.
*/
fpu__save(fpu);
- return get_xsave_addr(&fpu->state.xsave, xsave_state);
+ return get_xsave_addr(&fpu->state.xsave, xfeature_nr);
}
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -1001,32 +1084,10 @@
return true;
}
-static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count)
+static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
+ void *init_xstate, unsigned int size)
{
- if (*pos < to) {
- unsigned size = to - *pos;
-
- if (size > *count)
- size = *count;
- memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size);
- *kbuf += size;
- *pos += size;
- *count -= size;
- }
-}
-
-static void copy_part(unsigned offset, unsigned size, void *from,
- void **kbuf, unsigned *pos, unsigned *count)
-{
- fill_gap(offset, kbuf, pos, count);
- if (size > *count)
- size = *count;
- if (size) {
- memcpy(*kbuf, from, size);
- *kbuf += size;
- *pos += size;
- *count -= size;
- }
+ membuf_write(to, from_xstate ? xstate : init_xstate, size);
}
/*
@@ -1036,154 +1097,83 @@
* It supports partial copy but pos always starts from zero. This is called
* from xstateregs_get() and there we check the CPU has XSAVES.
*/
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave)
{
+ const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
+ struct xregs_state *xinit = &init_fpstate.xsave;
struct xstate_header header;
- const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr);
- unsigned count = size_total;
+ unsigned int zerofrom;
int i;
/*
- * Currently copy_regset_to_user() starts from pos 0:
- */
- if (unlikely(offset_start != 0))
- return -EFAULT;
-
- /*
* The destination is a ptrace buffer; we put in only user xstates:
*/
memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures;
- header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+ header.xfeatures &= xfeatures_mask_user();
- if (header.xfeatures & XFEATURE_MASK_FP)
- copy_part(0, off_mxcsr,
- &xsave->i387, &kbuf, &offset_start, &count);
- if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM))
- copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE,
- &xsave->i387.mxcsr, &kbuf, &offset_start, &count);
- if (header.xfeatures & XFEATURE_MASK_FP)
- copy_part(offsetof(struct fxregs_state, st_space), 128,
- &xsave->i387.st_space, &kbuf, &offset_start, &count);
- if (header.xfeatures & XFEATURE_MASK_SSE)
- copy_part(xstate_offsets[XFEATURE_SSE], 256,
- &xsave->i387.xmm_space, &kbuf, &offset_start, &count);
- /*
- * Fill xsave->i387.sw_reserved value for ptrace frame:
- */
- copy_part(offsetof(struct fxregs_state, sw_reserved), 48,
- xstate_fx_sw_bytes, &kbuf, &offset_start, &count);
- /*
- * Copy xregs_state->header:
- */
- copy_part(offsetof(struct xregs_state, header), sizeof(header),
- &header, &kbuf, &offset_start, &count);
+ /* Copy FP state up to MXCSR */
+ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
+ &xinit->i387, off_mxcsr);
+
+ /* Copy MXCSR when SSE or YMM are set in the feature mask */
+ copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
+ &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
+ MXCSR_AND_FLAGS_SIZE);
+
+ /* Copy the remaining FP state */
+ copy_feature(header.xfeatures & XFEATURE_MASK_FP,
+ &to, &xsave->i387.st_space, &xinit->i387.st_space,
+ sizeof(xsave->i387.st_space));
+
+ /* Copy the SSE state - shared with YMM, but independently managed */
+ copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
+ &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
+ sizeof(xsave->i387.xmm_space));
+
+ /* Zero the padding area */
+ membuf_zero(&to, sizeof(xsave->i387.padding));
+
+ /* Copy xsave->i387.sw_reserved */
+ membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
+
+ /* Copy the user space relevant state of @xsave->header */
+ membuf_write(&to, &header, sizeof(header));
+
+ zerofrom = offsetof(struct xregs_state, extended_state_area);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
/*
- * Copy only in-use xstates:
+ * The ptrace buffer is in non-compacted XSAVE format.
+ * In non-compacted format disabled features still occupy
+ * state space, but there is no state to copy from in the
+ * compacted init_fpstate. The gap tracking will zero this
+ * later.
*/
- if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, 1 << i);
+ if (!(xfeatures_mask_user() & BIT_ULL(i)))
+ continue;
- copy_part(xstate_offsets[i], xstate_sizes[i],
- src, &kbuf, &offset_start, &count);
- }
-
- }
- fill_gap(size_total, &kbuf, &offset_start, &count);
-
- return 0;
-}
-
-static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
-{
- if (!size)
- return 0;
-
- if (offset < size_total) {
- unsigned int copy = min(size, size_total - offset);
-
- if (__copy_to_user(ubuf + offset, data, copy))
- return -EFAULT;
- }
- return 0;
-}
-
-/*
- * Convert from kernel XSAVES compacted format to standard format and copy
- * to a user-space buffer. It supports partial copy but pos always starts from
- * zero. This is called from xstateregs_get() and there we check the CPU
- * has XSAVES.
- */
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
-{
- unsigned int offset, size;
- int ret, i;
- struct xstate_header header;
-
- /*
- * Currently copy_regset_to_user() starts from pos 0:
- */
- if (unlikely(offset_start != 0))
- return -EFAULT;
-
- /*
- * The destination is a ptrace buffer; we put in only user xstates:
- */
- memset(&header, 0, sizeof(header));
- header.xfeatures = xsave->header.xfeatures;
- header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
-
- /*
- * Copy xregs_state->header:
- */
- offset = offsetof(struct xregs_state, header);
- size = sizeof(header);
-
- ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
- if (ret)
- return ret;
-
- for (i = 0; i < XFEATURE_MAX; i++) {
/*
- * Copy only in-use xstates:
+ * If there was a feature or alignment gap, zero the space
+ * in the destination buffer.
*/
- if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, 1 << i);
+ if (zerofrom < xstate_offsets[i])
+ membuf_zero(&to, xstate_offsets[i] - zerofrom);
- offset = xstate_offsets[i];
- size = xstate_sizes[i];
+ copy_feature(header.xfeatures & BIT_ULL(i), &to,
+ __raw_xsave_addr(xsave, i),
+ __raw_xsave_addr(xinit, i),
+ xstate_sizes[i]);
- /* The next component has to fit fully into the output buffer: */
- if (offset + size > size_total)
- break;
-
- ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
- if (ret)
- return ret;
- }
-
+ /*
+ * Keep track of the last copied state in the non-compacted
+ * target buffer for gap zeroing.
+ */
+ zerofrom = xstate_offsets[i] + xstate_sizes[i];
}
- if (xfeatures_mxcsr_quirk(header.xfeatures)) {
- offset = offsetof(struct fxregs_state, mxcsr);
- size = MXCSR_AND_FLAGS_SIZE;
- __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
- }
-
- /*
- * Fill xsave->i387.sw_reserved value for ptrace frame:
- */
- offset = offsetof(struct fxregs_state, sw_reserved);
- size = sizeof(xstate_fx_sw_bytes);
-
- ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
- if (ret)
- return ret;
-
- return 0;
+ if (to.left)
+ membuf_zero(&to, to.left);
}
/*
@@ -1201,14 +1191,14 @@
memcpy(&hdr, kbuf + offset, size);
- if (validate_xstate_header(&hdr))
+ if (validate_user_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
u64 mask = ((u64)1 << i);
if (hdr.xfeatures & mask) {
- void *dst = __raw_xsave_addr(xsave, 1 << i);
+ void *dst = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1227,7 +1217,7 @@
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
*/
- xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/*
* Add back in the features that came in from userspace:
@@ -1255,14 +1245,14 @@
if (__copy_from_user(&hdr, ubuf + offset, size))
return -EFAULT;
- if (validate_xstate_header(&hdr))
+ if (validate_user_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
u64 mask = ((u64)1 << i);
if (hdr.xfeatures & mask) {
- void *dst = __raw_xsave_addr(xsave, 1 << i);
+ void *dst = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1283,7 +1273,7 @@
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
*/
- xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
/*
* Add back in the features that came in from userspace:
@@ -1292,3 +1282,175 @@
return 0;
}
+
+/*
+ * Save only supervisor states to the kernel buffer. This blows away all
+ * old states, and is intended to be used only in __fpu__restore_sig(), where
+ * user states are restored from the user buffer.
+ */
+void copy_supervisor_to_kernel(struct xregs_state *xstate)
+{
+ struct xstate_header *header;
+ u64 max_bit, min_bit;
+ u32 lmask, hmask;
+ int err, i;
+
+ if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES)))
+ return;
+
+ if (!xfeatures_mask_supervisor())
+ return;
+
+ max_bit = __fls(xfeatures_mask_supervisor());
+ min_bit = __ffs(xfeatures_mask_supervisor());
+
+ lmask = xfeatures_mask_supervisor();
+ hmask = xfeatures_mask_supervisor() >> 32;
+ XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+
+ /* We should never fault when copying to a kernel buffer: */
+ if (WARN_ON_FPU(err))
+ return;
+
+ /*
+ * At this point, the buffer has only supervisor states and must be
+ * converted back to normal kernel format.
+ */
+ header = &xstate->header;
+ header->xcomp_bv |= xfeatures_mask_all;
+
+ /*
+ * This only moves states up in the buffer. Start with
+ * the last state and move backwards so that states are
+ * not overwritten until after they are moved. Note:
+ * memmove() allows overlapping src/dst buffers.
+ */
+ for (i = max_bit; i >= min_bit; i--) {
+ u8 *xbuf = (u8 *)xstate;
+
+ if (!((header->xfeatures >> i) & 1))
+ continue;
+
+ /* Move xfeature 'i' into its normal location */
+ memmove(xbuf + xstate_comp_offsets[i],
+ xbuf + xstate_supervisor_only_offsets[i],
+ xstate_sizes[i]);
+ }
+}
+
+/**
+ * copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to
+ * an xsave area
+ * @xstate: A pointer to an xsave area
+ * @mask: Represent the dynamic supervisor features saved into the xsave area
+ *
+ * Only the dynamic supervisor states sets in the mask are saved into the xsave
+ * area (See the comment in XFEATURE_MASK_DYNAMIC for the details of dynamic
+ * supervisor feature). Besides the dynamic supervisor states, the legacy
+ * region and XSAVE header are also saved into the xsave area. The supervisor
+ * features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
+ * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved.
+ *
+ * The xsave area must be 64-bytes aligned.
+ */
+void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask)
+{
+ u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
+ u32 lmask, hmask;
+ int err;
+
+ if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
+ return;
+
+ if (WARN_ON_FPU(!dynamic_mask))
+ return;
+
+ lmask = dynamic_mask;
+ hmask = dynamic_mask >> 32;
+
+ XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+
+ /* Should never fault when copying to a kernel buffer */
+ WARN_ON_FPU(err);
+}
+
+/**
+ * copy_kernel_to_dynamic_supervisor() - Restore dynamic supervisor states from
+ * an xsave area
+ * @xstate: A pointer to an xsave area
+ * @mask: Represent the dynamic supervisor features restored from the xsave area
+ *
+ * Only the dynamic supervisor states sets in the mask are restored from the
+ * xsave area (See the comment in XFEATURE_MASK_DYNAMIC for the details of
+ * dynamic supervisor feature). Besides the dynamic supervisor states, the
+ * legacy region and XSAVE header are also restored from the xsave area. The
+ * supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
+ * XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored.
+ *
+ * The xsave area must be 64-bytes aligned.
+ */
+void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask)
+{
+ u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
+ u32 lmask, hmask;
+ int err;
+
+ if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
+ return;
+
+ if (WARN_ON_FPU(!dynamic_mask))
+ return;
+
+ lmask = dynamic_mask;
+ hmask = dynamic_mask >> 32;
+
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
+
+ /* Should never fault when copying from a kernel buffer */
+ WARN_ON_FPU(err);
+}
+
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+ unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ long delta;
+
+ if (!timestamp) {
+ /*
+ * Report -1 if no AVX512 usage
+ */
+ delta = -1;
+ } else {
+ delta = (long)(jiffies - timestamp);
+ /*
+ * Cap to LONG_MAX if time difference > LONG_MAX
+ */
+ if (delta < 0)
+ delta = LONG_MAX;
+ delta = jiffies_to_msecs(delta);
+ }
+
+ seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+ seq_putc(m, '\n');
+}
+
+/*
+ * Report architecture specific information
+ */
+int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ /*
+ * Report AVX512 state if the processor and build option supported.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_AVX512F))
+ avx512_status(m, task);
+
+ return 0;
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
--
Gitblit v1.6.2