4 files changed, 79 insertions, 51 deletions
diff --git a/include/asm-ia64/mmu.h b/include/asm-ia64/mmu.h
index ae1525352a25..611432ba579c 100644
--- a/include/asm-ia64/mmu.h
+++ b/include/asm-ia64/mmu.h
@@ -2,10 +2,12 @@
 #define __MMU_H
 
 /*
- * Type for a context number.  We declare it volatile to ensure proper ordering when it's
- * accessed outside of spinlock'd critical sections (e.g., as done in activate_mm() and
- * init_new_context()).
+ * Type for a context number.  We declare it volatile to ensure proper
+ * ordering when it's accessed outside of spinlock'd critical sections
+ * (e.g., as done in activate_mm() and init_new_context()).
  */
 typedef volatile unsigned long mm_context_t;
 
+typedef unsigned long nv_mm_context_t;
+
 #endif
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index e3e5fededb04..0680d163be97 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -55,34 +55,46 @@ static inline void
 delayed_tlb_flush (void)
 {
 	extern void local_flush_tlb_all (void);
+	unsigned long flags;
 
 	if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
-		local_flush_tlb_all();
-		__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
+		spin_lock_irqsave(&ia64_ctx.lock, flags);
+		{
+			if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
+				local_flush_tlb_all();
+				__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
+			}
+		}
+		spin_unlock_irqrestore(&ia64_ctx.lock, flags);
 	}
 }
 
-static inline mm_context_t
+static inline nv_mm_context_t
 get_mmu_context (struct mm_struct *mm)
 {
 	unsigned long flags;
-	mm_context_t context = mm->context;
-
-	if (context)
-		return context;
-
-	spin_lock_irqsave(&ia64_ctx.lock, flags);
-	{
-		/* re-check, now that we've got the lock: */
-		context = mm->context;
-		if (context == 0) {
-			cpus_clear(mm->cpu_vm_mask);
-			if (ia64_ctx.next >= ia64_ctx.limit)
-				wrap_mmu_context(mm);
-			mm->context = context = ia64_ctx.next++;
+	nv_mm_context_t context = mm->context;
+
+	if (unlikely(!context)) {
+		spin_lock_irqsave(&ia64_ctx.lock, flags);
+		{
+			/* re-check, now that we've got the lock: */
+			context = mm->context;
+			if (context == 0) {
+				cpus_clear(mm->cpu_vm_mask);
+				if (ia64_ctx.next >= ia64_ctx.limit)
+					wrap_mmu_context(mm);
+				mm->context = context = ia64_ctx.next++;
+			}
 		}
+		spin_unlock_irqrestore(&ia64_ctx.lock, flags);
 	}
-	spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+	/*
+	 * Ensure we're not starting to use "context" before any old
+	 * uses of it are gone from our TLB.
+	 */
+	delayed_tlb_flush();
+
 	return context;
 }
 
@@ -104,7 +116,7 @@ destroy_context (struct mm_struct *mm)
 }
 
 static inline void
-reload_context (mm_context_t context)
+reload_context (nv_mm_context_t context)
 {
 	unsigned long rid;
 	unsigned long rid_incr = 0;
@@ -138,7 +150,7 @@ reload_context (mm_context_t context)
 static inline void
 activate_context (struct mm_struct *mm)
 {
-	mm_context_t context;
+	nv_mm_context_t context;
 
 	do {
 		context = get_mmu_context(mm);
@@ -157,8 +169,6 @@ activate_context (struct mm_struct *mm)
 static inline void
 activate_mm (struct mm_struct *prev, struct mm_struct *next)
 {
-	delayed_tlb_flush();
-
 	/*
 	 * We may get interrupts here, but that's OK because interrupt handlers cannot
 	 * touch user-space.
diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h
index 6ece5061dc19..e18b5ab0cb75 100644
--- a/include/asm-ia64/rwsem.h
+++ b/include/asm-ia64/rwsem.h
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
  * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 2005 Christoph Lameter <clameter@sgi.com>
  *
  * Based on asm-i386/rwsem.h and other architecture implementation.
  *
@@ -11,9 +12,9 @@
  *
  * The lock count is initialized to 0 (no active and no waiting lockers).
  *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case
- * of an uncontended lock. Readers increment by 1 and see a positive value
- * when uncontended, negative if there are writers (and maybe) readers
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
+ * the case of an uncontended lock. Readers increment by 1 and see a positive
+ * value when uncontended, negative if there are writers (and maybe) readers
  * waiting (in which case it goes to sleep).
  */
 
@@ -29,7 +30,7 @@
  * the semaphore definition
  */
 struct rw_semaphore {
-	signed int		count;
+	signed long		count;
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
 #if RWSEM_DEBUG
@@ -37,10 +38,10 @@ struct rw_semaphore {
 #endif
 };
 
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
+#define RWSEM_UNLOCKED_VALUE		__IA64_UL_CONST(0x0000000000000000)
+#define RWSEM_ACTIVE_BIAS		__IA64_UL_CONST(0x0000000000000001)
+#define RWSEM_ACTIVE_MASK		__IA64_UL_CONST(0x00000000ffffffff)
+#define RWSEM_WAITING_BIAS		-__IA64_UL_CONST(0x0000000100000000)
 #define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 
@@ -83,7 +84,7 @@ init_rwsem (struct rw_semaphore *sem)
 static inline void
 __down_read (struct rw_semaphore *sem)
 {
-	int result = ia64_fetchadd4_acq((unsigned int *)&sem->count, 1);
+	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
 
 	if (result < 0)
 		rwsem_down_read_failed(sem);
@@ -95,7 +96,7 @@ __down_read (struct rw_semaphore *sem)
 static inline void
 __down_write (struct rw_semaphore *sem)
 {
-	int old, new;
+	long old, new;
 
 	do {
 		old = sem->count;
@@ -112,7 +113,7 @@ __down_write (struct rw_semaphore *sem)
 static inline void
 __up_read (struct rw_semaphore *sem)
 {
-	int result = ia64_fetchadd4_rel((unsigned int *)&sem->count, -1);
+	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
 
 	if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
 		rwsem_wake(sem);
@@ -124,7 +125,7 @@ __up_read (struct rw_semaphore *sem)
 static inline void
 __up_write (struct rw_semaphore *sem)
 {
-	int old, new;
+	long old, new;
 
 	do {
 		old = sem->count;
@@ -141,7 +142,7 @@ __up_write (struct rw_semaphore *sem)
 static inline int
 __down_read_trylock (struct rw_semaphore *sem)
 {
-	int tmp;
+	long tmp;
 	while ((tmp = sem->count) >= 0) {
 		if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
 			return 1;
@@ -156,7 +157,7 @@ __down_read_trylock (struct rw_semaphore *sem)
 static inline int
 __down_write_trylock (struct rw_semaphore *sem)
 {
-	int tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
+	long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
 			      RWSEM_ACTIVE_WRITE_BIAS);
 	return tmp == RWSEM_UNLOCKED_VALUE;
 }
@@ -167,7 +168,7 @@ __down_write_trylock (struct rw_semaphore *sem)
 static inline void
 __downgrade_write (struct rw_semaphore *sem)
 {
-	int old, new;
+	long old, new;
 
 	do {
 		old = sem->count;
@@ -182,7 +183,7 @@ __downgrade_write (struct rw_semaphore *sem)
  * Implement atomic add functionality.  These used to be "inline" functions, but GCC v3.1
  * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
  */
-#define rwsem_atomic_add(delta, sem)	atomic_add(delta, (atomic_t *)(&(sem)->count))
-#define rwsem_atomic_update(delta, sem)	atomic_add_return(delta, (atomic_t *)(&(sem)->count))
+#define rwsem_atomic_add(delta, sem)	atomic64_add(delta, (atomic64_t *)(&(sem)->count))
+#define rwsem_atomic_update(delta, sem)	atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
 
 #endif /* _ASM_IA64_RWSEM_H */
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index 909936f25512..d2430aa0d49d 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -93,7 +93,15 @@ _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
 # endif /* CONFIG_MCKINLEY */
 #endif
 }
+
 #define _raw_spin_lock(lock) _raw_spin_lock_flags(lock, 0)
+
+/* Unlock by doing an ordered store and releasing the cacheline with nta */
+static inline void _raw_spin_unlock(spinlock_t *x) {
+	barrier();
+	asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x));
+}
+
 #else /* !ASM_SUPPORTED */
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
 # define _raw_spin_lock(x)								\
@@ -109,16 +117,16 @@ do {											\
 		} while (ia64_spinlock_val);						\
 	}										\
 } while (0)
+#define _raw_spin_unlock(x)	do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
 #endif /* !ASM_SUPPORTED */
 
 #define spin_is_locked(x)	((x)->lock != 0)
-#define _raw_spin_unlock(x)	do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
 #define _raw_spin_trylock(x)	(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
 #define spin_unlock_wait(x)	do { barrier(); } while ((x)->lock)
 
 typedef struct {
-	volatile unsigned int read_counter	: 31;
-	volatile unsigned int write_lock	:  1;
+	volatile unsigned int read_counter	: 24;
+	volatile unsigned int write_lock	:  8;
 #ifdef CONFIG_PREEMPT
 	unsigned int break_lock;
 #endif
@@ -174,6 +182,13 @@ do {										\
 	(result == 0);								\
 })
 
+static inline void _raw_write_unlock(rwlock_t *x)
+{
+	u8 *y = (u8 *)x;
+	barrier();
+	asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" );
+}
+
 #else /* !ASM_SUPPORTED */
 
 #define _raw_write_lock(l)								\
@@ -195,14 +210,14 @@ do {										\
 	(ia64_val == 0);						\
 })
 
+static inline void _raw_write_unlock(rwlock_t *x)
+{
+	barrier();
+	x->write_lock = 0;
+}
+
 #endif /* !ASM_SUPPORTED */
 
 #define _raw_read_trylock(lock) generic_raw_read_trylock(lock)
 
-#define _raw_write_unlock(x)								\
-({											\
-	smp_mb__before_clear_bit();	/* need barrier before releasing lock... */	\
-	clear_bit(31, (x));								\
-})
-
 #endif /*  _ASM_IA64_SPINLOCK_H */