summaryrefslogtreecommitdiffstats
path: root/arch/tile/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile/lib')
-rw-r--r--arch/tile/lib/Makefile1
-rw-r--r--arch/tile/lib/cacheflush.c30
-rw-r--r--arch/tile/lib/memcpy_user_64.c8
-rw-r--r--arch/tile/lib/spinlock_common.h2
4 files changed, 36 insertions, 5 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 0c26086ecbef..985f59858234 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
strchr_$(BITS).o strlen_$(BITS).o
ifeq ($(CONFIG_TILEGX),y)
+CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer
lib-y += memcpy_user_64.o
else
lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index 8928aace7a64..db4fb89e12d8 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
{
char *p, *base;
size_t step_size, load_count;
+
+ /*
+ * On TILEPro the striping granularity is a fixed 8KB; on
+ * TILE-Gx it is configurable, and we rely on the fact that
+ * the hypervisor always configures maximum striping, so that
+ * bits 9 and 10 of the PA are part of the stripe function, so
+ * every 512 bytes we hit a striping boundary.
+ *
+ */
+#ifdef __tilegx__
+ const unsigned long STRIPE_WIDTH = 512;
+#else
const unsigned long STRIPE_WIDTH = 8192;
+#endif
+
#ifdef __tilegx__
/*
* On TILE-Gx, we must disable the dstream prefetcher before doing
@@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
* memory, that one load would be sufficient, but since we may
* be, we also need to back up to the last load issued to
* another memory controller, which would be the point where
- * we crossed an 8KB boundary (the granularity of striping
+ * we crossed a "striping" boundary (the granularity of striping
* across memory controllers). Keep backing up and doing this
* until we are before the beginning of the buffer, or have
* hit all the controllers.
@@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
* every cache line on a full memory stripe on each
* controller" that we simply do that, to simplify the logic.
*
- * FIXME: See bug 9535 for some issues with this code.
+ * On TILE-Gx the hash-for-home function is much more complex,
+ * with the upshot being we can't readily guarantee we have
+ * hit both entries in the 128-entry AMT that were hit by any
+ * load in the entire range, so we just re-load them all.
+ * With larger buffers, we may want to consider using a hypervisor
+ * trap to issue loads directly to each hash-for-home tile for
+ * each controller (doing it from Linux would trash the TLB).
*/
if (hfh) {
step_size = L2_CACHE_BYTES;
+#ifdef __tilegx__
+ load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES;
+#else
load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
(1 << CHIP_LOG_NUM_MSHIMS());
+#endif
} else {
step_size = STRIPE_WIDTH;
load_count = (1 << CHIP_LOG_NUM_MSHIMS());
@@ -109,7 +133,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
/* Figure out how far back we need to go. */
base = p - (step_size * (load_count - 2));
- if ((long)base < (long)buffer)
+ if ((unsigned long)base < (unsigned long)buffer)
base = buffer;
/*
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 4763b3aff1cc..37440caa7370 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -14,7 +14,13 @@
* Do memcpy(), but trap and return "n" when a load or store faults.
*
* Note: this idiom only works when memcpy() compiles to a leaf function.
- * If "sp" is updated during memcpy, the "jrp lr" will be incorrect.
+ * Here leaf function not only means it does not have calls, but also
+ * requires no stack operations (sp, stack frame pointer) and no
+ * use of callee-saved registers, else "jrp lr" will be incorrect since
+ * unwinding stack frame is bypassed. Since memcpy() is not complex so
+ * these conditions are satisfied here, but we need to be careful when
+ * modifying this file. This is not a clean solution but is the best
+ * one so far.
*
* Also note that we are capturing "n" from the containing scope here.
*/
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h
index c10109809132..6ac37509faca 100644
--- a/arch/tile/lib/spinlock_common.h
+++ b/arch/tile/lib/spinlock_common.h
@@ -60,5 +60,5 @@ static void delay_backoff(int iterations)
loops += __insn_crc32_32(stack_pointer, get_cycles_low()) &
(loops - 1);
- relax(1 << exponent);
+ relax(loops);
}