From 7f79695cc1b6aa6d80a861780d9f8ce75d3dddcb Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 22 Aug 2016 12:06:21 +0200
Subject: s390/fpu: improve kernel_fpu_[begin|end]

In case of nested user of the FPU or vector registers in the kernel
the current code uses the mask of the FPU/vector registers of the
previous contexts to decide which registers to save and restore.
E.g. if the previous context used KERNEL_VXR_V0V7 and the next
context wants to use KERNEL_VXR_V24V31 the first 8 vector registers
are stored to the FPU state structure. But this is not necessary
as the next context does not use these registers.

Rework the FPU/vector register save and restore code. The new code
does a few things differently:
1) A lowcore field is used instead of a per-cpu variable.
2) The kernel_fpu_end function now has two parameters just like
   kernel_fpu_begin. The register flags are required by both
   functions to save / restore the minimal register set.
3) The inline functions kernel_fpu_begin/kernel_fpu_end now do the
   update of the register masks. If the user space FPU registers
   have already been stored neither save_fpu_regs nor the
   __kernel_fpu_begin/__kernel_fpu_end functions have to be called
   for the first context. In this case kernel_fpu_begin adds 7
   instructions and kernel_fpu_end adds 4 instructions.
3) The inline assemblies in __kernel_fpu_begin / __kernel_fpu_end
   to save / restore the vector registers are simplified a bit.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/fpu/api.h | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

(limited to 'arch/s390/include/asm/fpu/api.h')

diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 6aba6fc406ad..02124d66bfb5 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -64,18 +64,18 @@ static inline int test_fp_ctl(u32 fpc)
 	return rc;
 }
 
-#define KERNEL_VXR_V0V7		1
-#define KERNEL_VXR_V8V15	2
-#define KERNEL_VXR_V16V23	4
-#define KERNEL_VXR_V24V31	8
-#define KERNEL_FPR		16
-#define KERNEL_FPC		256
+#define KERNEL_FPC		1
+#define KERNEL_VXR_V0V7		2
+#define KERNEL_VXR_V8V15	4
+#define KERNEL_VXR_V16V23	8
+#define KERNEL_VXR_V24V31	16
 
 #define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
 #define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
 #define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
 
-#define KERNEL_FPU_MASK		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR)
+#define KERNEL_VXR		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
+#define KERNEL_FPR		(KERNEL_FPC|KERNEL_VXR_V0V7)
 
 struct kernel_fpu;
 
@@ -87,18 +87,28 @@ struct kernel_fpu;
  * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
  */
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
-void __kernel_fpu_end(struct kernel_fpu *state);
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
 
 
 static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
 	preempt_disable();
-	__kernel_fpu_begin(state, flags);
+	state->mask = S390_lowcore.fpu_flags;
+	if (!test_cpu_flag(CIF_FPU))
+		/* Save user space FPU state and register contents */
+		save_fpu_regs();
+	else if (state->mask & flags)
+		/* Save FPU/vector register in-use by the kernel */
+		__kernel_fpu_begin(state, flags);
+	S390_lowcore.fpu_flags |= flags;
 }
 
-static inline void kernel_fpu_end(struct kernel_fpu *state)
+static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
-	__kernel_fpu_end(state);
+	S390_lowcore.fpu_flags = state->mask;
+	if (state->mask & flags)
+		/* Restore FPU/vector register in-use by the kernel */
+		__kernel_fpu_end(state, flags);
 	preempt_enable();
 }
 
-- 
cgit v1.2.3