From 4ecd16ec7059390b430af34bd8bc3ca2b5dcef9a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 24 Jan 2016 14:38:06 -0800 Subject: x86/fpu: Fix math emulation in eager fpu mode Systems without an FPU are generally old and therefore use lazy FPU switching. Unsurprisingly, math emulation in eager FPU mode is a bit buggy. Fix it. There were two bugs involving kernel code trying to use the FPU registers in eager mode even if they didn't exist and one BUG_ON() that was incorrect. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: yu-cheng yu Link: http://lkml.kernel.org/r/b4b8d112436bd6fab866e1b4011131507e8d7fbe.1453675014.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 3 ++- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/traps.c | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0fd440df63f1..a1f78a9fbf41 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -589,7 +589,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = new_fpu->fpstate_active && + fpu.preload = static_cpu_has(X86_FEATURE_FPU) && + new_fpu->fpstate_active && (use_eager_fpu() || new_fpu->counter > 5); if (old_fpu->fpregs_active) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d25097c3fc1d..08e1e11a05ca 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -423,7 +423,7 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!use_eager_fpu()) { + if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { /* FPU state will be reallocated lazily at the first use. */ fpu__drop(fpu); } else { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ade185a46b1d..87f80febf477 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -750,7 +750,6 @@ dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { -- cgit v1.2.3 From 5ed73f40735c68d8a656b46d09b1885d3b8740ae Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 24 Jan 2016 14:38:07 -0800 Subject: x86/fpu: Fix FNSAVE usage in eagerfpu mode In eager fpu mode, having deactivated FPU without immediately reloading some other context is illegal. Therefore, to recover from FNSAVE, we can't just deactivate the state -- we need to reload it if we're not actively context switching. We had this wrong in fpu__save() and fpu__copy(). Fix both. __kernel_fpu_begin() was fine -- add a comment. This fixes a warning triggerable with nofxsr eagerfpu=on. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: yu-cheng yu Link: http://lkml.kernel.org/r/60662444e13c76f06e23c15c5dcdba31b4ac3d67.1453675014.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 08e1e11a05ca..7a9244df33e2 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -114,6 +114,10 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); if (fpu->fpregs_active) { + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); @@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { - if (!copy_fpregs_to_fpstate(fpu)) - fpregs_deactivate(fpu); + if (!copy_fpregs_to_fpstate(fpu)) { + if (use_eager_fpu()) + copy_kernel_to_fpregs(&fpu->state); + else + fpregs_deactivate(fpu); + } } preempt_enable(); } @@ -259,7 +267,11 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); - fpregs_deactivate(src_fpu); + + if (use_eager_fpu()) + copy_kernel_to_fpregs(&src_fpu->state); + else + fpregs_deactivate(src_fpu); } preempt_enable(); } -- cgit v1.2.3 From a20d7297045f7fdcd676c15243192eb0e95a4306 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 24 Jan 2016 14:38:08 -0800 Subject: x86/fpu: Fold fpu_copy() into fpu__copy() Splitting it into two functions needlessly obfuscated the code. While we're at it, improve the comment slightly. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: yu-cheng yu Link: http://lkml.kernel.org/r/3eb5a63a9c5c84077b2677a7dfe684eef96fe59e.1453675014.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7a9244df33e2..299b58bb975b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -231,14 +231,15 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -/* - * Copy the current task's FPU state to a new task's FPU context. - * - * In both the 'eager' and the 'lazy' case we save hardware registers - * directly to the destination buffer. - */ -static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { + dst_fpu->counter = 0; + dst_fpu->fpregs_active = 0; + dst_fpu->last_cpu = -1; + + if (!src_fpu->fpstate_active || !cpu_has_fpu) + return 0; + WARN_ON_FPU(src_fpu != ¤t->thread.fpu); /* @@ -251,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) /* * Save current FPU registers directly into the child * FPU context, without any memory-to-memory copying. - * - * If the FPU context got destroyed in the process (FNSAVE - * done on old CPUs) then copy it back into the source - * context and mark the current task for lazy restore. + * In lazy mode, if the FPU context isn't loaded into + * fpregs, CR0.TS will be set and do_device_not_available + * will load the FPU context. * * We have to do all this with preemption disabled, * mostly because of the FNSAVE case, because in that @@ -274,16 +274,6 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) fpregs_deactivate(src_fpu); } preempt_enable(); -} - -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) -{ - dst_fpu->counter = 0; - dst_fpu->fpregs_active = 0; - dst_fpu->last_cpu = -1; - - if (src_fpu->fpstate_active && cpu_has_fpu) - fpu_copy(dst_fpu, src_fpu); return 0; } -- cgit v1.2.3 From c6ab109f7e0eae3bae3bb10f8ddb0df67735c150 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 24 Jan 2016 14:38:09 -0800 Subject: x86/fpu: Speed up lazy FPU restores slightly If we have an FPU, there's no need to check CR0 for FPU emulation. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: yu-cheng yu Link: http://lkml.kernel.org/r/980004297e233c27066d54e71382c44cdd36ef7c.1453675014.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 87f80febf477..36a9c017540e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -752,7 +752,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_MATH_EMULATION - if (read_cr0() & X86_CR0_EM) { + if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) { struct math_emu_info info = { }; conditional_sti(regs); -- cgit v1.2.3 From 58122bf1d856a4ea9581d62a07c557d997d46a19 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 24 Jan 2016 14:38:10 -0800 Subject: x86/fpu: Default eagerfpu=on on all CPUs We have eager and lazy FPU modes, introduced in: 304bceda6a18 ("x86, fpu: use non-lazy fpu restore for processors supporting xsave") The result is rather messy. There are two code paths in almost all of the FPU code, and only one of them (the eager case) is tested frequently, since most kernel developers have new enough hardware that we use eagerfpu. It seems that, on any remotely recent hardware, eagerfpu is a win: glibc uses SSE2, so laziness is probably overoptimistic, and, in any case, manipulating TS is far slower that saving and restoring the full state. (Stores to CR0.TS are serializing and are poorly optimized.) To try to shake out any latent issues on old hardware, this changes the default to eager on all CPUs. If no performance or functionality problems show up, a subsequent patch could remove lazy mode entirely. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: yu-cheng yu Link: http://lkml.kernel.org/r/ac290de61bf08d9cfc2664a4f5080257ffc1075a.1453675014.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/init.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6d9f0a7ef4c8..471fe277ff40 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -260,7 +260,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) * not only saved the restores along the way, but we also have the * FPU ready to be used for the original task. * - * 'eager' switching is used on modern CPUs, there we switch the FPU + * 'lazy' is deprecated because it's almost never a performance win + * and it's much more complicated than 'eager'. + * + * 'eager' switching is by default on all CPUs, there we switch the FPU * state during every context switch, regardless of whether the task * has used FPU instructions in that time slice or not. This is done * because modern FPU context saving instructions are able to optimize @@ -271,7 +274,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) * to use 'eager' restores, if we detect that a task is using the FPU * frequently. See the fpu->counter logic in fpu/internal.h for that. ] */ -static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static enum { ENABLE, DISABLE } eagerfpu = ENABLE; /* * Find supported xfeatures based on cpu features and command-line input. @@ -348,15 +351,9 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { - /* - * No need to check "eagerfpu=auto" again, since it is the - * initial default. - */ if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { eagerfpu = DISABLE; fpu__clear_eager_fpu_features(); - } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { - eagerfpu = ENABLE; } if (cmdline_find_option_bool(boot_command_line, "no387")) -- cgit v1.2.3