From 063452fd94d153d4eb38ad58f210f3d37a09cca4 Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Sat, 29 Jan 2022 09:36:46 -0800 Subject: x86/fpu/xstate: Fix the ARCH_REQ_XCOMP_PERM implementation ARCH_REQ_XCOMP_PERM is supposed to add the requested feature to the permission bitmap of thread_group_leader()->fpu. But the code overwrites the bitmap with the requested feature bit only rather than adding it. Fix the code to add the requested feature bit to the master bitmask. Fixes: db8268df0983 ("x86/arch_prctl: Add controls for dynamic XSTATE components") Signed-off-by: Yang Zhong Signed-off-by: Chang S. Bae Signed-off-by: Thomas Gleixner Cc: Paolo Bonzini Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220129173647.27981-2-chang.seok.bae@intel.com --- arch/x86/kernel/fpu/xstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 7c7824ae7862..dc6d5e98d296 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1639,7 +1639,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) perm = guest ? &fpu->guest_perm : &fpu->perm; /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ - WRITE_ONCE(perm->__state_perm, requested); + WRITE_ONCE(perm->__state_perm, mask); /* Protected by sighand lock */ perm->__state_size = ksize; perm->__user_state_size = usize; -- cgit v1.2.3 From a9f84fb7158fea60cbcadef5c0166fb22b469091 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2022 14:47:08 +0100 Subject: x86/fpu: Remove redundant XCOMP_BV initialization fpu_copy_uabi_to_guest_fpstate() initializes the XCOMP_BV field in the XSAVE header. That's a leftover from the old KVM FPU buffer handling code. Since d69c1382e1b7 ("x86/kvm: Convert FPU handling to a single swap buffer") KVM uses the FPU core allocation code, which initializes the XCOMP_BV field already. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220324134623.408932232@linutronix.de --- arch/x86/kernel/fpu/core.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 19821f027cb3..c049561f373a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -415,9 +415,6 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU); *vpkru = xpkru->pkru; } - - /* Ensure that XCOMP_BV is set up for XSAVES */ - xstate_init_xcomp_bv(&kstate->regs.xsave, kstate->xfeatures); return 0; } EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); -- cgit v1.2.3 From d47f71f6de7970d504748d1a60a11c51af5bce47 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2022 14:47:09 +0100 Subject: x86/fpu: Remove unused supervisor only offsets No users. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220324134623.465066249@linutronix.de --- arch/x86/kernel/fpu/xstate.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index dc6d5e98d296..dc33556779f7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -83,8 +83,6 @@ static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; -static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init = - { [ 0 ... XFEATURE_MAX - 1] = -1}; /* * Return whether the system supports a given xfeature. @@ -324,33 +322,6 @@ static void __init setup_xstate_comp_offsets(void) } } -/* - * Setup offsets of a supervisor-state-only XSAVES buffer: - * - * The offsets stored in xstate_comp_offsets[] only work for one specific - * value of the Requested Feature BitMap (RFBM). In cases where a different - * RFBM value is used, a different set of offsets is required. This set of - * offsets is for when RFBM=xfeatures_mask_supervisor(). - */ -static void __init setup_supervisor_only_offsets(void) -{ - unsigned int next_offset; - int i; - - next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - - for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { - if (!xfeature_is_supervisor(i)) - continue; - - if (xfeature_is_aligned(i)) - next_offset = ALIGN(next_offset, 64); - - xstate_supervisor_only_offsets[i] = next_offset; - next_offset += xstate_sizes[i]; - } -} - /* * Print out xstate component offsets and sizes */ @@ -951,7 +922,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) setup_init_fpu_buf(); setup_xstate_comp_offsets(); - setup_supervisor_only_offsets(); /* * Paranoia check whether something in the setup modified the -- cgit v1.2.3 From 35a77d4503d9d9d0e19e3a2a0d3fc9ab09fb6857 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2022 14:47:11 +0100 Subject: x86/fpu/xsave: Initialize offset/size cache early Reading XSTATE feature information from CPUID over and over does not make sense. The information has to be cached anyway, so it can be done early. Prepare for runtime calculation of XSTATE offsets and allow consolidation of the size calculation functions in a later step. Rename the function while at it as it does not setup any features. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220324134623.519411939@linutronix.de --- arch/x86/kernel/fpu/xstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index dc33556779f7..814c2fddbd83 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -180,7 +180,7 @@ static bool xfeature_enabled(enum xfeature xfeature) * Record the offsets and sizes of various xstates contained * in the XSAVE state memory layout. */ -static void __init setup_xstate_features(void) +static void __init setup_xstate_cache(void) { u32 eax, ebx, ecx, edx, i; /* start at the beginning of the "extended state" */ @@ -390,7 +390,6 @@ static void __init setup_init_fpu_buf(void) if (!boot_cpu_has(X86_FEATURE_XSAVE)) return; - setup_xstate_features(); print_xstate_features(); xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features); @@ -906,6 +905,10 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); + + /* Cache size, offset and flags for initialization */ + setup_xstate_cache(); + err = init_xstate_size(); if (err) goto out_disable; -- cgit v1.2.3 From 6afbb58cc2251c1d83472ca3005638206e73b6b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2022 14:47:12 +0100 Subject: x86/fpu: Cache xfeature flags from CPUID In preparation for runtime calculation of XSAVE offsets cache the feature flags for each XSTATE component during feature enumeration via CPUID(0xD). EDX has two relevant bits: 0 Supervisor component 1 Feature storage must be 64 byte aligned These bits are currently only evaluated during init, but the alignment bit must be cached to make runtime calculation of XSAVE offsets efficient. Cache the full EDX content and use it for the existing alignment and supervisor checks. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220324134623.573656209@linutronix.de --- arch/x86/kernel/fpu/xstate.c | 49 ++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 36 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 814c2fddbd83..5a069c2a3675 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -83,6 +83,10 @@ static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; +static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; + +#define XSTATE_FLAG_SUPERVISOR BIT(0) +#define XSTATE_FLAG_ALIGNED64 BIT(1) /* * Return whether the system supports a given xfeature. @@ -122,17 +126,14 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) } EXPORT_SYMBOL_GPL(cpu_has_xfeatures); -static bool xfeature_is_supervisor(int xfeature_nr) +static bool xfeature_is_aligned64(int xfeature_nr) { - /* - * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1) - * returns ECX[0] set to (1) for a supervisor state, and cleared (0) - * for a user state. - */ - u32 eax, ebx, ecx, edx; + return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64; +} - cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); - return ecx & 1; +static bool xfeature_is_supervisor(int xfeature_nr) +{ + return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR; } /* @@ -203,6 +204,7 @@ static void __init setup_xstate_cache(void) cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); xstate_sizes[i] = eax; + xstate_flags[i] = ecx; /* * If an xfeature is supervisor state, the offset in EBX is @@ -261,31 +263,6 @@ static void __init print_xstate_features(void) WARN_ON(nr >= XFEATURE_MAX); \ } while (0) -/* - * We could cache this like xstate_size[], but we only use - * it here, so it would be a waste of space. - */ -static int xfeature_is_aligned(int xfeature_nr) -{ - u32 eax, ebx, ecx, edx; - - CHECK_XFEATURE(xfeature_nr); - - if (!xfeature_enabled(xfeature_nr)) { - WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n", - xfeature_nr); - return 0; - } - - cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); - /* - * The value returned by ECX[1] indicates the alignment - * of state component 'i' when the compacted format - * of the extended region of an XSAVE area is used: - */ - return !!(ecx & 2); -} - /* * This function sets up offsets and sizes of all extended states in * xsave area. This supports both standard format and compacted format @@ -314,7 +291,7 @@ static void __init setup_xstate_comp_offsets(void) next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { - if (xfeature_is_aligned(i)) + if (xfeature_is_aligned64(i)) next_offset = ALIGN(next_offset, 64); xstate_comp_offsets[i] = next_offset; @@ -619,7 +596,7 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) for_each_extended_xfeature(i, xfeatures) { /* Align from the end of the previous feature */ - if (xfeature_is_aligned(i)) + if (xfeature_is_aligned64(i)) size = ALIGN(size, 64); /* * In compacted format the enabled features are packed, -- cgit v1.2.3 From 7aa5128b5fea26cf224766303ea3b8df343f9a87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2022 14:47:13 +0100 Subject: x86/fpu/xsave: Handle compacted offsets correctly with supervisor states So far the cached fixed compacted offsets worked, but with (re-)enabling of ENQCMD this does no longer work with KVM fpstate. KVM does not have supervisor features enabled for the guest FPU, which means that KVM has then a different XSAVE area layout than the host FPU state. This in turn breaks the copy from/to UABI functions when invoked for a guest state. Remove the pre-calculated compacted offsets and calculate the offset of each component at runtime based on the XCOMP_BV field in the XSAVE header. The runtime overhead is not interesting because these copy from/to UABI functions are not used in critical fast paths. KVM uses them to save and restore FPU state during migration. The host uses them for ptrace and for the slow path of 32bit signal handling. Fixes: 7c1ef59145f1 ("x86/cpufeatures: Re-enable ENQCMD") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220324134623.627636809@linutronix.de --- arch/x86/kernel/fpu/xstate.c | 86 +++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 45 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 5a069c2a3675..c55f72eb5466 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -81,8 +81,6 @@ static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; -static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init = - { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; #define XSTATE_FLAG_SUPERVISOR BIT(0) @@ -136,6 +134,33 @@ static bool xfeature_is_supervisor(int xfeature_nr) return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR; } +static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) +{ + unsigned int offs, i; + + /* + * Non-compacted format and legacy features use the cached fixed + * offsets. + */ + if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE) + return xstate_offsets[xfeature]; + + /* + * Compacted format offsets depend on the actual content of the + * compacted xsave area which is determined by the xcomp_bv header + * field. + */ + offs = FXSAVE_SIZE + XSAVE_HDR_SIZE; + for_each_extended_xfeature(i, xcomp_bv) { + if (xfeature_is_aligned64(i)) + offs = ALIGN(offs, 64); + if (i == xfeature) + break; + offs += xstate_sizes[i]; + } + return offs; +} + /* * Enable the extended processor state save/restore feature. * Called once per CPU onlining. @@ -263,42 +288,6 @@ static void __init print_xstate_features(void) WARN_ON(nr >= XFEATURE_MAX); \ } while (0) -/* - * This function sets up offsets and sizes of all extended states in - * xsave area. This supports both standard format and compacted format - * of the xsave area. - */ -static void __init setup_xstate_comp_offsets(void) -{ - unsigned int next_offset; - int i; - - /* - * The FP xstates and SSE xstates are legacy states. They are always - * in the fixed offsets in the xsave area in either compacted form - * or standard form. - */ - xstate_comp_offsets[XFEATURE_FP] = 0; - xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state, - xmm_space); - - if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) { - for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) - xstate_comp_offsets[i] = xstate_offsets[i]; - return; - } - - next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; - - for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { - if (xfeature_is_aligned64(i)) - next_offset = ALIGN(next_offset, 64); - - xstate_comp_offsets[i] = next_offset; - next_offset += xstate_sizes[i]; - } -} - /* * Print out xstate component offsets and sizes */ @@ -308,7 +297,8 @@ static void __init print_xstate_offset_size(void) for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", - i, xstate_comp_offsets[i], i, xstate_sizes[i]); + i, xfeature_get_offset(fpu_kernel_cfg.max_features, i), + i, xstate_sizes[i]); } } @@ -901,7 +891,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) fpu_user_cfg.max_features); setup_init_fpu_buf(); - setup_xstate_comp_offsets(); /* * Paranoia check whether something in the setup modified the @@ -956,13 +945,19 @@ void fpu__resume_cpu(void) */ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { - if (!xfeature_enabled(xfeature_nr)) { - WARN_ON_FPU(1); + u64 xcomp_bv = xsave->header.xcomp_bv; + + if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) return NULL; + + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { + if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) + return NULL; } - return (void *)xsave + xstate_comp_offsets[xfeature_nr]; + return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr); } + /* * Given the xsave area and a state inside, this function returns the * address of the state. @@ -993,8 +988,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) * We should not ever be requesting features that we * have not enabled. */ - WARN_ONCE(!(fpu_kernel_cfg.max_features & BIT_ULL(xfeature_nr)), - "get of unsupported state"); + if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) + return NULL; + /* * This assumes the last 'xsave*' instruction to * have requested that 'xfeature_nr' be saved. -- cgit v1.2.3 From 781c64bfcb735960717d1cb45428047ff6a5030c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2022 14:47:14 +0100 Subject: x86/fpu/xstate: Handle supervisor states in XSTATE permissions The size calculation in __xstate_request_perm() fails to take supervisor states into account because the permission bitmap is only relevant for user states. Up to 5.17 this does not matter because there are no supervisor states supported, but the (re-)enabling of ENQCMD makes them available. Fixes: 7c1ef59145f1 ("x86/cpufeatures: Re-enable ENQCMD") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220324134623.681768598@linutronix.de --- arch/x86/kernel/fpu/xstate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c55f72eb5466..5ac934b48d4a 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1571,6 +1571,9 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) /* Calculate the resulting kernel state size */ mask = permitted | requested; + /* Take supervisor states into account on the host */ + if (!guest) + mask |= xfeatures_mask_supervisor(); ksize = xstate_calculate_size(mask, compacted); /* Calculate the resulting user state size */ -- cgit v1.2.3 From d6d6d50f1e801a790a242c80eeda261e36c43b7b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Mar 2022 20:43:21 +0200 Subject: x86/fpu/xstate: Consolidate size calculations Use the offset calculation to do the size calculation which avoids yet another series of CPUID instructions for each invocation. [ Fix the FP/SSE only case which missed to take the xstate header into account, as Reported-by: kernel test robot ] Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/87o81pgbp2.ffs@tglx --- arch/x86/kernel/fpu/xstate.c | 49 ++++++++------------------------------------ 1 file changed, 8 insertions(+), 41 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 5ac934b48d4a..39e1c8626ab9 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -385,25 +385,6 @@ static void __init setup_init_fpu_buf(void) fxsave(&init_fpstate.regs.fxsave); } -static int xfeature_uncompacted_offset(int xfeature_nr) -{ - u32 eax, ebx, ecx, edx; - - /* - * Only XSAVES supports supervisor states and it uses compacted - * format. Checking a supervisor state's uncompacted offset is - * an error. - */ - if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) { - WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr); - return -1; - } - - CHECK_XFEATURE(xfeature_nr); - cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); - return ebx; -} - int xfeature_size(int xfeature_nr) { u32 eax, ebx, ecx, edx; @@ -581,29 +562,15 @@ static bool __init check_xstate_against_struct(int nr) static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) { - unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; - int i; + unsigned int topmost = fls64(xfeatures) - 1; + unsigned int offset = xstate_offsets[topmost]; - for_each_extended_xfeature(i, xfeatures) { - /* Align from the end of the previous feature */ - if (xfeature_is_aligned64(i)) - size = ALIGN(size, 64); - /* - * In compacted format the enabled features are packed, - * i.e. disabled features do not occupy space. - * - * In non-compacted format the offsets are fixed and - * disabled states still occupy space in the memory buffer. - */ - if (!compacted) - size = xfeature_uncompacted_offset(i); - /* - * Add the feature size even for non-compacted format - * to make the end result correct - */ - size += xfeature_size(i); - } - return size; + if (topmost <= XFEATURE_SSE) + return sizeof(struct xregs_state); + + if (compacted) + offset = xfeature_get_offset(xfeatures, topmost); + return offset + xstate_sizes[topmost]; } /* -- cgit v1.2.3