tilegx: change how we find the kernel stack

Previously, we used a special-purpose register (SPR_SYSTEM_SAVE_K_0) to hold the CPU number and the top of the current kernel stack by using the low bits to hold the CPU number, and using the high bits to hold the address of the page just above where we'd want the kernel stack to be. That way we could initialize a new SP when first entering the kernel by just masking the SPR value and subtracting a couple of words. However, it's actually more useful to be able to place an arbitrary kernel-top value in the SPR. This allows us to create a new stack context (e.g. for virtualization) with an arbitrary top-of-stack VA. To make this work, we now store the CPU number in the high bits, above the highest legal VA bit (42 bits in the current tilegx microarchitecture). The full 42 bits are thus available to store the top of stack value. Getting the current cpu (a relatively common operation) is still fast; it's now a shift rather than a mask. We make this change only for tilegx, since tilepro has too few SPR bits to do this, and we don't need this support on tilepro anyway. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
author: Chris Metcalf <cmetcalf@tilera.com> 2013-08-10 12:35:02 -0400
committer: Chris Metcalf <cmetcalf@tilera.com> 2013-08-30 11:56:58 -0400
commit: 35f059761c5ac313d13372fe3cdaa41bce3d0dbf (patch)
tree: 1a8f7e0eba01afac74c081348530fccd63dc48e4 /arch/tile/kernel
parent: 4036c7d3542ce82ea343bf95dd05ca46aefba9aa (diff)
download: linux-35f059761c5ac313d13372fe3cdaa41bce3d0dbf.tar.bz2
5 files changed, 24 insertions, 23 deletions
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
index d1527fce2861..f3f17b0283ff 100644
--- a/arch/tile/kernel/head_32.S
+++ b/arch/tile/kernel/head_32.S
@@ -86,7 +86,7 @@ ENTRY(_start)
 	/*
 	 * Load up our per-cpu offset.  When the first (master) tile
 	 * boots, this value is still zero, so we will load boot_pc
-	 * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+	 * with start_kernel, and boot_sp at the top of init_stack.
 	 * The master tile initializes the per-cpu offset array, so that
 	 * when subsequent (secondary) tiles boot, they will instead load
 	 * from their per-cpu versions of boot_sp and boot_pc.
@@ -126,7 +126,6 @@ ENTRY(_start)
 	lw sp, r1
 	or r4, sp, r4
 	mtspr SPR_SYSTEM_SAVE_K_0, r4  /* save ksp0 + cpu */
-	addi sp, sp, -STACK_TOP_DELTA
 	{
 	  move lr, zero   /* stop backtraces in the called function */
 	  jr r0
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S
index 969e4f81f3b3..652b81426158 100644
--- a/arch/tile/kernel/head_64.S
+++ b/arch/tile/kernel/head_64.S
@@ -158,7 +158,7 @@ ENTRY(_start)
 	/*
 	 * Load up our per-cpu offset.  When the first (master) tile
 	 * boots, this value is still zero, so we will load boot_pc
-	 * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+	 * with start_kernel, and boot_sp with at the top of init_stack.
 	 * The master tile initializes the per-cpu offset array, so that
 	 * when subsequent (secondary) tiles boot, they will instead load
 	 * from their per-cpu versions of boot_sp and boot_pc.
@@ -202,9 +202,9 @@ ENTRY(_start)
 	}
 	ld r0, r0
 	ld sp, r1
-	or r4, sp, r4
+	shli r4, r4, CPU_SHIFT
+	bfins r4, sp, 0, CPU_SHIFT-1
 	mtspr SPR_SYSTEM_SAVE_K_0, r4  /* save ksp0 + cpu */
-	addi sp, sp, -STACK_TOP_DELTA
 	{
 	  move lr, zero   /* stop backtraces in the called function */
 	  jr r0
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 9c0c3cb6aab0..f3d26f48e659 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -185,7 +185,7 @@ intvec_\vecname:
 	 * point sp at the top aligned address on the actual stack page.
 	 */
 	mfspr   r0, SPR_SYSTEM_SAVE_K_0
-	mm      r0, r0, zero, LOG2_THREAD_SIZE, 31
+	mm      r0, r0, zero, LOG2_NR_CPU_IDS, 31
 
 0:
 	/*
@@ -203,6 +203,9 @@ intvec_\vecname:
 	 *    cache line 1: r14...r29
 	 *    cache line 0: 2 x frame, r0..r13
 	 */
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
 	andi    r0, r0, -64
 
 	/*
@@ -464,7 +467,7 @@ intvec_\vecname:
 	}
 	{
 	 auli   r21, r21, ha16(__per_cpu_offset)
-	 mm     r20, r20, zero, 0, LOG2_THREAD_SIZE-1
+	 mm     r20, r20, zero, 0, LOG2_NR_CPU_IDS-1
 	}
 	s2a     r20, r20, r21
 	lw      tp, r20
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index df19d4f3946e..3b35bb490d3e 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -132,13 +132,9 @@ intvec_\vecname:
 	mfspr   r3, SPR_SYSTEM_SAVE_K_0
 
 	/* Get &thread_info->unalign_jit_tmp[0] in r3. */
+	bfexts  r3, r3, 0, CPU_SHIFT-1
 	mm      r3, zero, LOG2_THREAD_SIZE, 63
-#if THREAD_SIZE < 65536
-	addli   r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
-#else
-	addli   r3, r3, -(PAGE_SIZE/2)
-	addli   r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
-#endif
+	addli   r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET
 
 	/*
 	 * Save r0, r1, r2 into thread_info array r3 points to
@@ -365,13 +361,13 @@ intvec_\vecname:
 
 2:
 	/*
-	 * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
-	 * the current stack top in the higher bits.  So we recover
-	 * our stack top by just masking off the low bits, then
+	 * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and
+	 * the current stack top in the lower bits.  So we recover
+	 * our starting stack value by sign-extending the low bits, then
 	 * point sp at the top aligned address on the actual stack page.
 	 */
 	mfspr   r0, SPR_SYSTEM_SAVE_K_0
-	mm      r0, zero, LOG2_THREAD_SIZE, 63
+	bfexts  r0, r0, 0, CPU_SHIFT-1
 
 0:
 	/*
@@ -393,6 +389,9 @@ intvec_\vecname:
 	 *    cache line 1: r6...r13
 	 *    cache line 0: 2 x frame, r0..r5
 	 */
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
 	andi    r0, r0, -64
 
 	/*
@@ -690,7 +689,7 @@ intvec_\vecname:
 	}
 	{
 	 shl16insli r21, r21, hw1(__per_cpu_offset)
-	 bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
+	 bfextu r20, r20, CPU_SHIFT, 63
 	}
 	shl16insli r21, r21, hw0(__per_cpu_offset)
 	shl3add r20, r20, r21
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index a9db923bb9eb..24fd223df65d 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -197,19 +197,19 @@ static void validate_stack(struct pt_regs *regs)
 {
 	int cpu = raw_smp_processor_id();
 	unsigned long ksp0 = get_current_ksp0();
-	unsigned long ksp0_base = ksp0 - THREAD_SIZE;
+	unsigned long ksp0_base = ksp0 & -THREAD_SIZE;
 	unsigned long sp = stack_pointer;
 
 	if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) {
-		pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n"
+		pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n"
 		       "  sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
-		       cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+		       cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
 	}
 
 	else if (sp < ksp0_base + sizeof(struct thread_info)) {
-		pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n"
+		pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n"
 		       "  sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
-		       cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+		       cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
 	}
 }
author	Chris Metcalf <cmetcalf@tilera.com>	2013-08-10 12:35:02 -0400
committer	Chris Metcalf <cmetcalf@tilera.com>	2013-08-30 11:56:58 -0400
commit	35f059761c5ac313d13372fe3cdaa41bce3d0dbf (patch)
tree	1a8f7e0eba01afac74c081348530fccd63dc48e4 /arch/tile/kernel
parent	4036c7d3542ce82ea343bf95dd05ca46aefba9aa (diff)
download	linux-35f059761c5ac313d13372fe3cdaa41bce3d0dbf.tar.bz2