summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/atomic.h87
-rw-r--r--arch/alpha/include/asm/rwsem.h68
-rw-r--r--arch/alpha/include/asm/spinlock.h9
-rw-r--r--arch/arc/Makefile2
-rw-r--r--arch/arc/include/asm/atomic.h99
-rw-r--r--arch/arc/include/asm/spinlock.h7
-rw-r--r--arch/arc/kernel/stacktrace.c2
-rw-r--r--arch/arm/boot/dts/armada-385-linksys.dtsi4
-rw-r--r--arch/arm/boot/dts/sama5d2.dtsi2
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi21
-rw-r--r--arch/arm/boot/dts/sun5i-a10s.dtsi11
-rw-r--r--arch/arm/boot/dts/sun5i-r8-chip.dts2
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi13
-rw-r--r--arch/arm/boot/dts/tegra30-beaver.dts3
-rw-r--r--arch/arm/include/asm/atomic.h106
-rw-r--r--arch/arm/include/asm/efi.h4
-rw-r--r--arch/arm/include/asm/spinlock.h19
-rw-r--r--arch/arm/include/uapi/asm/kvm.h4
-rw-r--r--arch/arm/kvm/arm.c1
-rw-r--r--arch/arm/mach-mvebu/Makefile10
-rw-r--r--arch/arm/mach-mvebu/coherency.c23
-rw-r--r--arch/arm64/include/asm/atomic.h60
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h110
-rw-r--r--arch/arm64/include/asm/atomic_lse.h278
-rw-r--r--arch/arm64/include/asm/barrier.h13
-rw-r--r--arch/arm64/include/asm/cmpxchg.h51
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/efi.h4
-rw-r--r--arch/arm64/include/asm/ptrace.h2
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/cpu_errata.c6
-rw-r--r--arch/arm64/kernel/entry.S19
-rw-r--r--arch/arm64/mm/fault.c3
-rw-r--r--arch/avr32/include/asm/atomic.h54
-rw-r--r--arch/blackfin/include/asm/atomic.h8
-rw-r--r--arch/blackfin/include/asm/spinlock.h5
-rw-r--r--arch/blackfin/kernel/bfin_ksyms.c1
-rw-r--r--arch/blackfin/mach-bf561/atomic.S43
-rw-r--r--arch/frv/include/asm/atomic.h30
-rw-r--r--arch/frv/include/asm/atomic_defs.h2
-rw-r--r--arch/frv/include/asm/serial.h4
-rw-r--r--arch/h8300/include/asm/atomic.h29
-rw-r--r--arch/hexagon/include/asm/atomic.h31
-rw-r--r--arch/hexagon/include/asm/spinlock.h10
-rw-r--r--arch/ia64/include/asm/atomic.h130
-rw-r--r--arch/ia64/include/asm/mutex.h2
-rw-r--r--arch/ia64/include/asm/rwsem.h31
-rw-r--r--arch/ia64/include/asm/spinlock.h4
-rw-r--r--arch/m32r/boot/compressed/m32r_sio.c9
-rw-r--r--arch/m32r/include/asm/atomic.h36
-rw-r--r--arch/m32r/include/asm/spinlock.h9
-rw-r--r--arch/m68k/coldfire/head.S2
-rw-r--r--arch/m68k/coldfire/m5272.c2
-rw-r--r--arch/m68k/coldfire/pci.c2
-rw-r--r--arch/m68k/configs/amiga_defconfig4
-rw-r--r--arch/m68k/configs/apollo_defconfig4
-rw-r--r--arch/m68k/configs/atari_defconfig4
-rw-r--r--arch/m68k/configs/bvme6000_defconfig4
-rw-r--r--arch/m68k/configs/hp300_defconfig4
-rw-r--r--arch/m68k/configs/mac_defconfig4
-rw-r--r--arch/m68k/configs/multi_defconfig4
-rw-r--r--arch/m68k/configs/mvme147_defconfig4
-rw-r--r--arch/m68k/configs/mvme16x_defconfig4
-rw-r--r--arch/m68k/configs/q40_defconfig4
-rw-r--r--arch/m68k/configs/sun3_defconfig4
-rw-r--r--arch/m68k/configs/sun3x_defconfig4
-rw-r--r--arch/m68k/ifpsp060/src/fpsp.S8
-rw-r--r--arch/m68k/ifpsp060/src/pfpsp.S4
-rw-r--r--arch/m68k/include/asm/atomic.h44
-rw-r--r--arch/m68k/include/asm/dma.h2
-rw-r--r--arch/m68k/include/asm/m525xsim.h4
-rw-r--r--arch/m68k/include/asm/mcfmmu.h2
-rw-r--r--arch/m68k/include/asm/q40_master.h2
-rw-r--r--arch/m68k/mac/iop.c2
-rw-r--r--arch/m68k/math-emu/fp_decode.h2
-rw-r--r--arch/metag/include/asm/atomic_lnkget.h36
-rw-r--r--arch/metag/include/asm/atomic_lock1.h33
-rw-r--r--arch/metag/include/asm/spinlock.h14
-rw-r--r--arch/mips/include/asm/atomic.h154
-rw-r--r--arch/mips/include/asm/pgtable.h10
-rw-r--r--arch/mips/include/asm/spinlock.h19
-rw-r--r--arch/mn10300/include/asm/atomic.h33
-rw-r--r--arch/mn10300/include/asm/spinlock.h8
-rw-r--r--arch/parisc/include/asm/atomic.h63
-rw-r--r--arch/parisc/include/asm/spinlock.h9
-rw-r--r--arch/powerpc/include/asm/atomic.h83
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/mutex.h2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/pci_64.c1
-rw-r--r--arch/powerpc/kernel/process.c10
-rw-r--r--arch/powerpc/kernel/tm.S61
-rw-r--r--arch/powerpc/mm/hash_utils_64.c4
-rw-r--r--arch/powerpc/mm/pgtable-radix.c5
-rw-r--r--arch/s390/include/asm/atomic.h40
-rw-r--r--arch/s390/include/asm/fpu/api.h2
-rw-r--r--arch/s390/include/asm/rwsem.h37
-rw-r--r--arch/s390/include/asm/spinlock.h3
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/sh/include/asm/atomic-grb.h34
-rw-r--r--arch/sh/include/asm/atomic-irq.h31
-rw-r--r--arch/sh/include/asm/atomic-llsc.h32
-rw-r--r--arch/sh/include/asm/spinlock.h10
-rw-r--r--arch/sparc/include/asm/atomic_32.h13
-rw-r--r--arch/sparc/include/asm/atomic_64.h16
-rw-r--r--arch/sparc/include/asm/spinlock_32.h7
-rw-r--r--arch/sparc/include/asm/spinlock_64.h10
-rw-r--r--arch/sparc/lib/atomic32.c29
-rw-r--r--arch/sparc/lib/atomic_64.S61
-rw-r--r--arch/sparc/lib/ksyms.c17
-rw-r--r--arch/tile/include/asm/atomic.h2
-rw-r--r--arch/tile/include/asm/atomic_32.h74
-rw-r--r--arch/tile/include/asm/atomic_64.h115
-rw-r--r--arch/tile/include/asm/barrier.h7
-rw-r--r--arch/tile/include/asm/bitops_32.h18
-rw-r--r--arch/tile/include/asm/futex.h14
-rw-r--r--arch/tile/lib/atomic_32.c50
-rw-r--r--arch/tile/lib/atomic_asm_32.S27
-rw-r--r--arch/tile/lib/spinlock_32.c6
-rw-r--r--arch/tile/lib/spinlock_64.c6
-rw-r--r--arch/x86/boot/compressed/eboot.c2
-rw-r--r--arch/x86/events/core.c36
-rw-r--r--arch/x86/events/intel/Makefile4
-rw-r--r--arch/x86/events/intel/core.c230
-rw-r--r--arch/x86/events/intel/cstate.c47
-rw-r--r--arch/x86/events/intel/lbr.c124
-rw-r--r--arch/x86/events/intel/rapl.c32
-rw-r--r--arch/x86/events/intel/uncore.c88
-rw-r--r--arch/x86/events/intel/uncore.h5
-rw-r--r--arch/x86/events/intel/uncore_snb.c67
-rw-r--r--arch/x86/events/intel/uncore_snbep.c96
-rw-r--r--arch/x86/events/msr.c63
-rw-r--r--arch/x86/events/perf_event.h12
-rw-r--r--arch/x86/include/asm/atomic.h35
-rw-r--r--arch/x86/include/asm/atomic64_32.h25
-rw-r--r--arch/x86/include/asm/atomic64_64.h35
-rw-r--r--arch/x86/include/asm/cpufeatures.h6
-rw-r--r--arch/x86/include/asm/efi.h9
-rw-r--r--arch/x86/include/asm/mutex_32.h2
-rw-r--r--arch/x86/include/asm/mutex_64.h6
-rw-r--r--arch/x86/include/asm/pvclock.h25
-rw-r--r--arch/x86/include/asm/rwsem.h18
-rw-r--r--arch/x86/include/asm/topology.h9
-rw-r--r--arch/x86/kernel/amd_nb.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/early-quirks.c105
-rw-r--r--arch/x86/kernel/pvclock.c11
-rw-r--r--arch/x86/kernel/smpboot.c25
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/vmx.c23
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/kvm/x86.h7
-rw-r--r--arch/x86/mm/kasan_init_64.c4
-rw-r--r--arch/x86/pci/acpi.c1
-rw-r--r--arch/x86/platform/efi/efi.c15
-rw-r--r--arch/x86/platform/efi/efi_64.c21
-rw-r--r--arch/x86/platform/uv/bios_uv.c3
-rw-r--r--arch/x86/power/hibernate_64.c97
-rw-r--r--arch/x86/power/hibernate_asm_64.S55
-rw-r--r--arch/x86/ras/mce_amd_inj.c58
-rw-r--r--arch/xtensa/include/asm/atomic.h52
-rw-r--r--arch/xtensa/include/asm/spinlock.h10
-rw-r--r--arch/xtensa/platforms/xt2000/setup.c1
165 files changed, 3227 insertions, 1080 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 572b228c44c7..498933a7df97 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
} \
#define ATOMIC_OP_RETURN(op, asm_op) \
-static inline int atomic_##op##_return(int i, atomic_t *v) \
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \
{ \
long temp, result; \
- smp_mb(); \
__asm__ __volatile__( \
"1: ldl_l %0,%1\n" \
" " #asm_op " %0,%3,%2\n" \
@@ -61,7 +60,23 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
".previous" \
:"=&r" (temp), "=m" (v->counter), "=&r" (result) \
:"Ir" (i), "m" (v->counter) : "memory"); \
- smp_mb(); \
+ return result; \
+}
+
+#define ATOMIC_FETCH_OP(op, asm_op) \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ long temp, result; \
+ __asm__ __volatile__( \
+ "1: ldl_l %2,%1\n" \
+ " " #asm_op " %2,%3,%0\n" \
+ " stl_c %0,%1\n" \
+ " beq %0,2f\n" \
+ ".subsection 2\n" \
+ "2: br 1b\n" \
+ ".previous" \
+ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \
+ :"Ir" (i), "m" (v->counter) : "memory"); \
return result; \
}
@@ -82,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
} \
#define ATOMIC64_OP_RETURN(op, asm_op) \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
{ \
long temp, result; \
- smp_mb(); \
__asm__ __volatile__( \
"1: ldq_l %0,%1\n" \
" " #asm_op " %0,%3,%2\n" \
@@ -97,34 +111,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
".previous" \
:"=&r" (temp), "=m" (v->counter), "=&r" (result) \
:"Ir" (i), "m" (v->counter) : "memory"); \
- smp_mb(); \
+ return result; \
+}
+
+#define ATOMIC64_FETCH_OP(op, asm_op) \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+{ \
+ long temp, result; \
+ __asm__ __volatile__( \
+ "1: ldq_l %2,%1\n" \
+ " " #asm_op " %2,%3,%0\n" \
+ " stq_c %0,%1\n" \
+ " beq %0,2f\n" \
+ ".subsection 2\n" \
+ "2: br 1b\n" \
+ ".previous" \
+ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \
+ :"Ir" (i), "m" (v->counter) : "memory"); \
return result; \
}
#define ATOMIC_OPS(op) \
ATOMIC_OP(op, op##l) \
ATOMIC_OP_RETURN(op, op##l) \
+ ATOMIC_FETCH_OP(op, op##l) \
ATOMIC64_OP(op, op##q) \
- ATOMIC64_OP_RETURN(op, op##q)
+ ATOMIC64_OP_RETURN(op, op##q) \
+ ATOMIC64_FETCH_OP(op, op##q)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
#define atomic_andnot atomic_andnot
#define atomic64_andnot atomic64_andnot
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, bis)
-ATOMIC_OP(xor, xor)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, bis)
-ATOMIC64_OP(xor, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm) \
+ ATOMIC_OP(op, asm) \
+ ATOMIC_FETCH_OP(op, asm) \
+ ATOMIC64_OP(op, asm) \
+ ATOMIC64_FETCH_OP(op, asm)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, bis)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#undef ATOMIC_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 0131a7058778..77873d0ad293 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count += RWSEM_ACTIVE_READ_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
{
long old, new, res;
- res = sem->count;
+ res = atomic_long_read(&sem->count);
do {
new = res + RWSEM_ACTIVE_READ_BIAS;
if (new <= 0)
break;
old = res;
- res = cmpxchg(&sem->count, old, new);
+ res = atomic_long_cmpxchg(&sem->count, old, new);
} while (res != old);
return res >= 0 ? 1 : 0;
}
@@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count += RWSEM_ACTIVE_WRITE_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
@@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count -= RWSEM_ACTIVE_READ_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem)
{
long count;
#ifndef CONFIG_SMP
- sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
- count = sem->count;
+ sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
+ count = sem->count.counter;
#else
long temp;
__asm__ __volatile__(
@@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count -= RWSEM_WAITING_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter -= RWSEM_WAITING_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem)
-{
-#ifndef CONFIG_SMP
- sem->count += val;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%2,%0\n"
- " stq_c %0,%1\n"
- " beq %0,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (temp), "=m" (sem->count)
- :"Ir" (val), "m" (sem->count));
-#endif
-}
-
-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
-{
-#ifndef CONFIG_SMP
- sem->count += val;
- return sem->count;
-#else
- long ret, temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " addq %0,%3,%0\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (ret), "=m" (sem->count), "=&r" (temp)
- :"Ir" (val), "m" (sem->count));
-
- return ret;
-#endif
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index fed9c6f44c19..a40b9fc0c6c3 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -3,6 +3,8 @@
#include <linux/kernel.h>
#include <asm/current.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
@@ -13,8 +15,11 @@
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
#define arch_spin_is_locked(x) ((x)->lock != 0)
-#define arch_spin_unlock_wait(x) \
- do { cpu_relax(); } while ((x)->lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index d4df6be66d58..85814e74677d 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -66,8 +66,6 @@ endif
endif
-cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables
-
# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
ifeq ($(atleast_gcc48),y)
cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index dd683995bc9d..4e3c1b6b0806 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -67,6 +67,33 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return val; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned int val, orig; \
+ \
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND thmeselves don't provide any such semantics \
+ */ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ "1: llock %[orig], [%[ctr]] \n" \
+ " " #asm_op " %[val], %[orig], %[i] \n" \
+ " scond %[val], [%[ctr]] \n" \
+ " \n" \
+ : [val] "=&r" (val), \
+ [orig] "=&r" (orig) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+ : "cc"); \
+ \
+ smp_mb(); \
+ \
+ return orig; \
+}
+
#else /* !CONFIG_ARC_HAS_LLSC */
#ifndef CONFIG_SMP
@@ -129,25 +156,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ unsigned long orig; \
+ \
+ /* \
+ * spin lock/unlock provides the needed smp_mb() before/after \
+ */ \
+ atomic_ops_lock(flags); \
+ orig = v->counter; \
+ v->counter c_op i; \
+ atomic_ops_unlock(flags); \
+ \
+ return orig; \
+}
+
#endif /* !CONFIG_ARC_HAS_LLSC */
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#define atomic_andnot atomic_andnot
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
-#undef SCOND_FAIL_RETRY_VAR_DEF
-#undef SCOND_FAIL_RETRY_ASM
-#undef SCOND_FAIL_RETRY_VARS
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
#else /* CONFIG_ARC_PLAT_EZNPS */
@@ -208,22 +254,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned int temp = i; \
+ \
+ /* Explicit full memory barrier needed before/after */ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ " mov r2, %0\n" \
+ " mov r3, %1\n" \
+ " .word %2\n" \
+ " mov %0, r2" \
+ : "+r"(temp) \
+ : "r"(&v->counter), "i"(asm_op) \
+ : "r2", "r3", "memory"); \
+ \
+ smp_mb(); \
+ \
+ return temp; \
+}
+
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
#define atomic_sub(i, v) atomic_add(-(i), (v))
#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
-ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
#endif /* CONFIG_ARC_PLAT_EZNPS */
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index cded4a9b5438..233d5ffe6ec7 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -15,8 +15,11 @@
#define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, !VAL);
+}
#ifdef CONFIG_ARC_HAS_LLSC
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index e0efff15a5ae..b9192a653b7e 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
* prelogue is setup (callee regs saved and then fp set and not other
* way around
*/
- pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+ pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
return 0;
#endif
diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index 8450944b28e6..22f7a13e20b4 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -58,8 +58,8 @@
soc {
ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000
MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000
- MBUS_ID(0x09, 0x09) 0 0xf1100000 0x10000
- MBUS_ID(0x09, 0x05) 0 0xf1110000 0x10000>;
+ MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000
+ MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>;
internal-regs {
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 2827e7ab5ebc..5dd2734e67ba 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -232,7 +232,7 @@
};
usb1: ohci@00400000 {
- compatible = "atmel,at91rm9200-ohci", "usb-ohci";
+ compatible = "atmel,sama5d2-ohci", "usb-ohci";
reg = <0x00400000 0x100000>;
interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>;
clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index a03e56fb5dbc..ca58eb279d55 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -65,8 +65,9 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_be0-lcd0-hdmi";
- clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
- <&ahb_gates 44>, <&dram_gates 26>;
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+ <&ahb_gates 43>, <&ahb_gates 44>,
+ <&dram_gates 26>;
status = "disabled";
};
@@ -74,8 +75,9 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi";
- clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
- <&ahb_gates 44>, <&ahb_gates 46>,
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+ <&ahb_gates 43>, <&ahb_gates 44>,
+ <&ahb_gates 46>,
<&dram_gates 25>, <&dram_gates 26>;
status = "disabled";
};
@@ -84,9 +86,9 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_fe0-de_be0-lcd0";
- clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>,
- <&ahb_gates 46>, <&dram_gates 25>,
- <&dram_gates 26>;
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+ <&ahb_gates 44>, <&ahb_gates 46>,
+ <&dram_gates 25>, <&dram_gates 26>;
status = "disabled";
};
@@ -94,8 +96,9 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0";
- clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
- <&ahb_gates 44>, <&ahb_gates 46>,
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>,
+ <&ahb_gates 36>, <&ahb_gates 44>,
+ <&ahb_gates 46>,
<&dram_gates 5>, <&dram_gates 25>, <&dram_gates 26>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index bddd0de88af6..367f33012493 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -65,8 +65,8 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_be0-lcd0-hdmi";
- clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
- <&ahb_gates 44>;
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+ <&ahb_gates 43>, <&ahb_gates 44>;
status = "disabled";
};
@@ -74,7 +74,8 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_be0-lcd0";
- clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>;
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+ <&ahb_gates 44>;
status = "disabled";
};
@@ -82,8 +83,8 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_be0-lcd0-tve0";
- clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
- <&ahb_gates 44>;
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>,
+ <&ahb_gates 36>, <&ahb_gates 44>;
status = "disabled";
};
};
diff --git a/arch/arm/boot/dts/sun5i-r8-chip.dts b/arch/arm/boot/dts/sun5i-r8-chip.dts
index a8d8b4582397..f694482bdeb6 100644
--- a/arch/arm/boot/dts/sun5i-r8-chip.dts
+++ b/arch/arm/boot/dts/sun5i-r8-chip.dts
@@ -52,7 +52,7 @@
/ {
model = "NextThing C.H.I.P.";
- compatible = "nextthing,chip", "allwinner,sun5i-r8";
+ compatible = "nextthing,chip", "allwinner,sun5i-r8", "allwinner,sun5i-a13";
aliases {
i2c0 = &i2c0;
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index febdf4c72fb0..2c34bbbb9570 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -67,8 +67,9 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_be0-lcd0-hdmi";
- clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
- <&ahb_gates 44>, <&dram_gates 26>;
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+ <&ahb_gates 43>, <&ahb_gates 44>,
+ <&dram_gates 26>;
status = "disabled";
};
@@ -76,8 +77,8 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_be0-lcd0";
- clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>,
- <&dram_gates 26>;
+ clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+ <&ahb_gates 44>, <&dram_gates 26>;
status = "disabled";
};
@@ -85,7 +86,7 @@
compatible = "allwinner,simple-framebuffer",
"simple-framebuffer";
allwinner,pipeline = "de_be0-lcd0-tve0";
- clocks = <&pll5 1>,
+ clocks = <&pll3>, <&pll5 1>,
<&ahb_gates 34>, <&ahb_gates 36>, <&ahb_gates 44>,
<&dram_gates 5>, <&dram_gates 26>;
status = "disabled";
@@ -231,6 +232,7 @@
pll3x2: pll3x2_clk {
#clock-cells = <0>;
compatible = "fixed-factor-clock";
+ clocks = <&pll3>;
clock-div = <1>;
clock-mult = <2>;
clock-output-names = "pll3-2x";
@@ -272,6 +274,7 @@
pll7x2: pll7x2_clk {
#clock-cells = <0>;
compatible = "fixed-factor-clock";
+ clocks = <&pll7>;
clock-div = <1>;
clock-mult = <2>;
clock-output-names = "pll7-2x";
diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts
index 1eca3b28ac64..b6da15d823a6 100644
--- a/arch/arm/boot/dts/tegra30-beaver.dts
+++ b/arch/arm/boot/dts/tegra30-beaver.dts
@@ -1843,7 +1843,7 @@
ldo5_reg: ldo5 {
regulator-name = "vddio_sdmmc,avdd_vdac";
- regulator-min-microvolt = <3300000>;
+ regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-always-on;
};
@@ -1914,6 +1914,7 @@
sdhci@78000000 {
status = "okay";
+ vqmmc-supply = <&ldo5_reg>;
cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>;
wp-gpios = <&gpio TEGRA_GPIO(T, 3) GPIO_ACTIVE_HIGH>;
power-gpios = <&gpio TEGRA_GPIO(D, 7) GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9e10c4567eb4..66d0e215a773 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \
return result; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int result, val; \
+ \
+ prefetchw(&v->counter); \
+ \
+ __asm__ __volatile__("@ atomic_fetch_" #op "\n" \
+"1: ldrex %0, [%4]\n" \
+" " #asm_op " %1, %0, %5\n" \
+" strex %2, %1, [%4]\n" \
+" teq %2, #0\n" \
+" bne 1b" \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \
+ : "r" (&v->counter), "Ir" (i) \
+ : "cc"); \
+ \
+ return result; \
+}
+
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
{
@@ -159,6 +187,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return val; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ int val; \
+ \
+ raw_local_irq_save(flags); \
+ val = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ \
+ return val; \
+}
+
static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
int ret;
@@ -187,19 +229,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#define atomic_andnot atomic_andnot
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, orr)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, orr)
+ATOMIC_OPS(xor, ^=, eor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -317,24 +366,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
return result; \
}
+#define ATOMIC64_FETCH_OP(op, op1, op2) \
+static inline long long \
+atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \
+{ \
+ long long result, val; \
+ unsigned long tmp; \
+ \
+ prefetchw(&v->counter); \
+ \
+ __asm__ __volatile__("@ atomic64_fetch_" #op "\n" \
+"1: ldrexd %0, %H0, [%4]\n" \
+" " #op1 " %Q1, %Q0, %Q5\n" \
+" " #op2 " %R1, %R0, %R5\n" \
+" strexd %2, %1, %H1, [%4]\n" \
+" teq %2, #0\n" \
+" bne 1b" \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \
+ : "r" (&v->counter), "r" (i) \
+ : "cc"); \
+ \
+ return result; \
+}
+
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
- ATOMIC64_OP_RETURN(op, op1, op2)
+ ATOMIC64_OP_RETURN(op, op1, op2) \
+ ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(add, adds, adc)
ATOMIC64_OPS(sub, subs, sbc)
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2) \
+ ATOMIC64_OP(op, op1, op2) \
+ ATOMIC64_FETCH_OP(op, op1, op2)
#define atomic64_andnot atomic64_andnot
-ATOMIC64_OP(and, and, and)
-ATOMIC64_OP(andnot, bic, bic)
-ATOMIC64_OP(or, orr, orr)
-ATOMIC64_OP(xor, eor, eor)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, orr, orr)
+ATOMIC64_OPS(xor, eor, eor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index a708fa1f0905..766bf9b78160 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -28,10 +28,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_setup() efi_virtmap_load()
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
-#define arch_efi_call_virt(f, args...) \
+#define arch_efi_call_virt(p, f, args...) \
({ \
efi_##f##_t *__f; \
- __f = efi.systab->runtime->f; \
+ __f = p->f; \
__f(args); \
})
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 0fa418463f49..4bec45442072 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -6,6 +6,8 @@
#endif
#include <linux/prefetch.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
@@ -50,8 +52,21 @@ static inline void dsb_sev(void)
* memory.
*/
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ u16 owner = READ_ONCE(lock->tickets.owner);
+
+ for (;;) {
+ arch_spinlock_t tmp = READ_ONCE(*lock);
+
+ if (tmp.tickets.owner == tmp.tickets.next ||
+ tmp.tickets.owner != owner)
+ break;
+
+ wfe();
+ }
+ smp_acquire__after_ctrl_dep();
+}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index df3f60cb1168..a2b3eb313a25 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -139,8 +139,8 @@ struct kvm_arch_memory_slot {
#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
-#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
+#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
/* Normal registers are mapped as coprocessor 16. */
#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 893941ec98dc..f1bde7c4e736 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_terminate(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
+ kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index ecf9e0c3b107..e53c6cfcab51 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -7,9 +7,15 @@ CFLAGS_pmsu.o := -march=armv7-a
obj-$(CONFIG_MACH_MVEBU_ANY) += system-controller.o mvebu-soc-id.o
ifeq ($(CONFIG_MACH_MVEBU_V7),y)
-obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o pm.o pm-board.o
+obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o
+
+obj-$(CONFIG_PM) += pm.o pm-board.o
obj-$(CONFIG_SMP) += platsmp.o headsmp.o platsmp-a9.o headsmp-a9.o
endif
obj-$(CONFIG_MACH_DOVE) += dove.o
-obj-$(CONFIG_MACH_KIRKWOOD) += kirkwood.o kirkwood-pm.o
+
+ifeq ($(CONFIG_MACH_KIRKWOOD),y)
+obj-y += kirkwood.o
+obj-$(CONFIG_PM) += kirkwood-pm.o
+endif
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index 7e989d61159c..e80f0dde2189 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -162,22 +162,16 @@ exit:
}
/*
- * This ioremap hook is used on Armada 375/38x to ensure that PCIe
- * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
- * is needed as a workaround for a deadlock issue between the PCIe
- * interface and the cache controller.
+ * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
+ * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
+ * needed for the HW I/O coherency mechanism to work properly without
+ * deadlock.
*/
static void __iomem *
-armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
- unsigned int mtype, void *caller)
+armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+ unsigned int mtype, void *caller)
{
- struct resource pcie_mem;
-
- mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
-
- if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
- mtype = MT_UNCACHED;
-
+ mtype = MT_UNCACHED;
return __arm_ioremap_caller(phys_addr, size, mtype, caller);
}
@@ -186,7 +180,8 @@ static void __init armada_375_380_coherency_init(struct device_node *np)
struct device_node *cache_dn;
coherency_cpu_base = of_iomap(np, 0);
- arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+ arch_ioremap_caller = armada_wa_ioremap_caller;
+ pci_ioremap_set_mem_type(MT_UNCACHED);
/*
* We should switch the PL310 to I/O coherency mode only if
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index f3a3586a421c..c0235e0ff849 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -76,6 +76,36 @@
#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_add_acquire atomic_fetch_add_acquire
+#define atomic_fetch_add_release atomic_fetch_add_release
+#define atomic_fetch_add atomic_fetch_add
+
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
+#define atomic_fetch_sub_release atomic_fetch_sub_release
+#define atomic_fetch_sub atomic_fetch_sub
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_and_acquire atomic_fetch_and_acquire
+#define atomic_fetch_and_release atomic_fetch_and_release
+#define atomic_fetch_and atomic_fetch_and
+
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
+#define atomic_fetch_andnot_release atomic_fetch_andnot_release
+#define atomic_fetch_andnot atomic_fetch_andnot
+
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_or_acquire atomic_fetch_or_acquire
+#define atomic_fetch_or_release atomic_fetch_or_release
+#define atomic_fetch_or atomic_fetch_or
+
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
+#define atomic_fetch_xor_release atomic_fetch_xor_release
+#define atomic_fetch_xor atomic_fetch_xor
+
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
@@ -125,6 +155,36 @@
#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
+#define atomic64_fetch_add_release atomic64_fetch_add_release
+#define atomic64_fetch_add atomic64_fetch_add
+
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
+#define atomic64_fetch_sub_release atomic64_fetch_sub_release
+#define atomic64_fetch_sub atomic64_fetch_sub
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
+#define atomic64_fetch_and_release atomic64_fetch_and_release
+#define atomic64_fetch_and atomic64_fetch_and
+
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
+#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
+#define atomic64_fetch_andnot atomic64_fetch_andnot
+
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
+#define atomic64_fetch_or_release atomic64_fetch_or_release
+#define atomic64_fetch_or atomic64_fetch_or
+
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
+#define atomic64_fetch_xor_release atomic64_fetch_xor_release
+#define atomic64_fetch_xor atomic64_fetch_xor
+
#define atomic64_xchg_relaxed atomic_xchg_relaxed
#define atomic64_xchg_acquire atomic_xchg_acquire
#define atomic64_xchg_release atomic_xchg_release
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f61c84f6ba02..f819fdcff1ac 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \
} \
__LL_SC_EXPORT(atomic_##op##_return##name);
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
+__LL_SC_INLINE int \
+__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \
+{ \
+ unsigned long tmp; \
+ int val, result; \
+ \
+ asm volatile("// atomic_fetch_" #op #name "\n" \
+" prfm pstl1strm, %3\n" \
+"1: ld" #acq "xr %w0, %3\n" \
+" " #asm_op " %w1, %w0, %w4\n" \
+" st" #rel "xr %w2, %w1, %3\n" \
+" cbnz %w2, 1b\n" \
+" " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : "Ir" (i) \
+ : cl); \
+ \
+ return result; \
+} \
+__LL_SC_EXPORT(atomic_fetch_##op##name);
+
#define ATOMIC_OPS(...) \
ATOMIC_OP(__VA_ARGS__) \
- ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)
-
-#define ATOMIC_OPS_RLX(...) \
- ATOMIC_OPS(__VA_ARGS__) \
+ ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\
ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
- ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)
+ ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
-ATOMIC_OPS_RLX(add, add)
-ATOMIC_OPS_RLX(sub, sub)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(...) \
+ ATOMIC_OP(__VA_ARGS__) \
+ ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, orr)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, orr)
+ATOMIC_OPS(xor, eor)
-#undef ATOMIC_OPS_RLX
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \
} \
__LL_SC_EXPORT(atomic64_##op##_return##name);
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
+__LL_SC_INLINE long \
+__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \
+{ \
+ long result, val; \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_fetch_" #op #name "\n" \
+" prfm pstl1strm, %3\n" \
+"1: ld" #acq "xr %0, %3\n" \
+" " #asm_op " %1, %0, %4\n" \
+" st" #rel "xr %w2, %1, %3\n" \
+" cbnz %w2, 1b\n" \
+" " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : "Ir" (i) \
+ : cl); \
+ \
+ return result; \
+} \
+__LL_SC_EXPORT(atomic64_fetch_##op##name);
+
#define ATOMIC64_OPS(...) \
ATOMIC64_OP(__VA_ARGS__) \
- ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__)
-
-#define ATOMIC64_OPS_RLX(...) \
- ATOMIC64_OPS(__VA_ARGS__) \
+ ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \
ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
- ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__)
+ ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
-ATOMIC64_OPS_RLX(add, add)
-ATOMIC64_OPS_RLX(sub, sub)
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, sub)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(...) \
+ ATOMIC64_OP(__VA_ARGS__) \
+ ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, orr)
-ATOMIC64_OP(xor, eor)
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(andnot, bic)
+ATOMIC64_OPS(or, orr)
+ATOMIC64_OPS(xor, eor)
-#undef ATOMIC64_OPS_RLX
#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 39c1d340fec5..b5890be8f257 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -26,54 +26,57 @@
#endif
#define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op)
-
-static inline void atomic_andnot(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
- " stclr %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#define ATOMIC_OP(op, asm_op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \
+" " #asm_op " %w[i], %[v]\n") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS); \
}
-static inline void atomic_or(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
+ATOMIC_OP(andnot, stclr)
+ATOMIC_OP(or, stset)
+ATOMIC_OP(xor, steor)
+ATOMIC_OP(add, stadd)
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
- " stset %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
-}
+#undef ATOMIC_OP
-static inline void atomic_xor(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
- " steor %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \
+static inline int atomic_fetch_##op##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ __LL_SC_ATOMIC(fetch_##op##name), \
+ /* LSE atomics */ \
+" " #asm_op #mb " %w[i], %w[i], %[v]") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return w0; \
}
-static inline void atomic_add(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_FETCH_OPS(op, asm_op) \
+ ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \
+ ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \
+ ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \
+ ATOMIC_FETCH_OP( , al, op, asm_op, "memory")
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
- " stadd %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
-}
+ATOMIC_FETCH_OPS(andnot, ldclr)
+ATOMIC_FETCH_OPS(or, ldset)
+ATOMIC_FETCH_OPS(xor, ldeor)
+ATOMIC_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_FETCH_OPS
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
static inline int atomic_add_return##name(int i, atomic_t *v) \
@@ -119,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v)
: __LL_SC_CLOBBERS);
}
+#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \
+static inline int atomic_fetch_and##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC(fetch_and##name), \
+ /* LSE atomics */ \
+ " mvn %w[i], %w[i]\n" \
+ " ldclr" #mb " %w[i], %w[i], %[v]") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return w0; \
+}
+
+ATOMIC_FETCH_OP_AND(_relaxed, )
+ATOMIC_FETCH_OP_AND(_acquire, a, "memory")
+ATOMIC_FETCH_OP_AND(_release, l, "memory")
+ATOMIC_FETCH_OP_AND( , al, "memory")
+
+#undef ATOMIC_FETCH_OP_AND
+
static inline void atomic_sub(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
@@ -164,57 +194,87 @@ ATOMIC_OP_SUB_RETURN(_release, l, "memory")
ATOMIC_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC_OP_SUB_RETURN
-#undef __LL_SC_ATOMIC
-
-#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
-
-static inline void atomic64_andnot(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
- " stclr %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \
+static inline int atomic_fetch_sub##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC(fetch_sub##name), \
+ /* LSE atomics */ \
+ " neg %w[i], %w[i]\n" \
+ " ldadd" #mb " %w[i], %w[i], %[v]") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return w0; \
}
-static inline void atomic64_or(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+ATOMIC_FETCH_OP_SUB(_relaxed, )
+ATOMIC_FETCH_OP_SUB(_acquire, a, "memory")
+ATOMIC_FETCH_OP_SUB(_release, l, "memory")
+ATOMIC_FETCH_OP_SUB( , al, "memory")
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
- " stset %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#undef ATOMIC_FETCH_OP_SUB
+#undef __LL_SC_ATOMIC
+
+#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
+#define ATOMIC64_OP(op, asm_op) \
+static inline void atomic64_##op(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("x0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \
+" " #asm_op " %[i], %[v]\n") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS); \
}
-static inline void atomic64_xor(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+ATOMIC64_OP(andnot, stclr)
+ATOMIC64_OP(or, stset)
+ATOMIC64_OP(xor, steor)
+ATOMIC64_OP(add, stadd)
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
- " steor %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#undef ATOMIC64_OP
+
+#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
+static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("x0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ __LL_SC_ATOMIC64(fetch_##op##name), \
+ /* LSE atomics */ \
+" " #asm_op #mb " %[i], %[i], %[v]") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return x0; \
}
-static inline void atomic64_add(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_FETCH_OPS(op, asm_op) \
+ ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \
+ ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \
+ ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \
+ ATOMIC64_FETCH_OP( , al, op, asm_op, "memory")
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
- " stadd %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
-}
+ATOMIC64_FETCH_OPS(andnot, ldclr)
+ATOMIC64_FETCH_OPS(or, ldset)
+ATOMIC64_FETCH_OPS(xor, ldeor)
+ATOMIC64_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_FETCH_OPS
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
static inline long atomic64_add_return##name(long i, atomic64_t *v) \
@@ -260,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v)
: __LL_SC_CLOBBERS);
}
+#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
+static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("w0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC64(fetch_and##name), \
+ /* LSE atomics */ \
+ " mvn %[i], %[i]\n" \
+ " ldclr" #mb " %[i], %[i], %[v]") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return x0; \
+}
+
+ATOMIC64_FETCH_OP_AND(_relaxed, )
+ATOMIC64_FETCH_OP_AND(_acquire, a, "memory")
+ATOMIC64_FETCH_OP_AND(_release, l, "memory")
+ATOMIC64_FETCH_OP_AND( , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_AND
+
static inline void atomic64_sub(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
@@ -306,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC64_OP_SUB_RETURN
+#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
+static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("w0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC64(fetch_sub##name), \
+ /* LSE atomics */ \
+ " neg %[i], %[i]\n" \
+ " ldadd" #mb " %[i], %[i], %[v]") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return x0; \
+}
+
+ATOMIC64_FETCH_OP_SUB(_relaxed, )
+ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory")
+ATOMIC64_FETCH_OP_SUB(_release, l, "memory")
+ATOMIC64_FETCH_OP_SUB( , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_SUB
+
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
register long x0 asm ("x0") = (long)v;
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index dae5c49618db..4eea7f618dce 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -91,6 +91,19 @@ do { \
__u.__val; \
})
+#define smp_cond_load_acquire(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ VAL; \
+})
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 510c7b404454..bd86a79491bc 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
__ret; \
})
+#define __CMPWAIT_CASE(w, sz, name) \
+static inline void __cmpwait_case_##name(volatile void *ptr, \
+ unsigned long val) \
+{ \
+ unsigned long tmp; \
+ \
+ asm volatile( \
+ " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " cbnz %" #w "[tmp], 1f\n" \
+ " wfe\n" \
+ "1:" \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
+ : [val] "r" (val)); \
+}
+
+__CMPWAIT_CASE(w, b, 1);
+__CMPWAIT_CASE(w, h, 2);
+__CMPWAIT_CASE(w, , 4);
+__CMPWAIT_CASE( , , 8);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN(sfx) \
+static inline void __cmpwait##sfx(volatile void *ptr, \
+ unsigned long val, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpwait_case##sfx##_1(ptr, (u8)val); \
+ case 2: \
+ return __cmpwait_case##sfx##_2(ptr, (u16)val); \
+ case 4: \
+ return __cmpwait_case##sfx##_4(ptr, val); \
+ case 8: \
+ return __cmpwait_case##sfx##_8(ptr, val); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+ __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 87e1985f3be8..9d9fd4b9a72e 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -80,12 +80,14 @@
#define APM_CPU_PART_POTENZA 0x000
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
+#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define BRCM_CPU_PART_VULCAN 0x516
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 622db3c6474e..bd887663689b 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -23,10 +23,10 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
efi_virtmap_load(); \
})
-#define arch_efi_call_virt(f, args...) \
+#define arch_efi_call_virt(p, f, args...) \
({ \
efi_##f##_t *__f; \
- __f = efi.systab->runtime->f; \
+ __f = p->f; \
__f(args); \
})
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index a307eb6e7fa8..7f94755089e2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -117,6 +117,8 @@ struct pt_regs {
};
u64 orig_x0;
u64 syscallno;
+ u64 orig_addr_limit;
+ u64 unused; // maintain 16 byte alignment
};
#define arch_has_single_step() (1)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f8e5d47f0880..2f4ba774488a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -60,6 +60,7 @@ int main(void)
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
+ DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index d42789499f17..af716b65110d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_RANGE(MIDR_THUNDERX, 0x00,
(1 << MIDR_VARIANT_SHIFT) | 1),
},
+ {
+ /* Cavium ThunderX, T81 pass 1.0 */
+ .desc = "Cavium erratum 27456",
+ .capability = ARM64_WORKAROUND_CAVIUM_27456,
+ MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+ },
#endif
{
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 12e8d2bcb3f9..6c3b7345a6c4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,6 +28,7 @@
#include <asm/errno.h>
#include <asm/esr.h>
#include <asm/irq.h>
+#include <asm/memory.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
@@ -97,7 +98,14 @@
mov x29, xzr // fp pointed to user-space
.else
add x21, sp, #S_FRAME_SIZE
- .endif
+ get_thread_info tsk
+ /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+ ldr x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [sp, #S_ORIG_ADDR_LIMIT]
+ mov x20, #TASK_SIZE_64
+ str x20, [tsk, #TI_ADDR_LIMIT]
+ ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+ .endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
@@ -128,6 +136,14 @@
.endm
.macro kernel_exit, el
+ .if \el != 0
+ /* Restore the task's original addr_limit. */
+ ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
+ str x20, [tsk, #TI_ADDR_LIMIT]
+
+ /* No need to restore UAO, it will be restored from SPSR_EL1 */
+ .endif
+
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
@@ -406,7 +422,6 @@ el1_irq:
bl trace_hardirqs_off
#endif
- get_thread_info tsk
irq_handler
#ifdef CONFIG_PREEMPT
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 013e2cbe7924..b1166d1e5955 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
}
if (permission_fault(esr) && (addr < USER_DS)) {
- if (get_fs() == KERNEL_DS)
+ /* regs->orig_addr_limit may be 0 if we entered from EL0 */
+ if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
if (!search_exception_tables(regs->pc))
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index d74fd8ce980a..3d5ce38a6f0b 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -41,21 +41,49 @@ static inline int __atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
+#define ATOMIC_FETCH_OP(op, asm_op, asm_con) \
+static inline int __atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int result, val; \
+ \
+ asm volatile( \
+ "/* atomic_fetch_" #op " */\n" \
+ "1: ssrf 5\n" \
+ " ld.w %0, %3\n" \
+ " mov %1, %0\n" \
+ " " #asm_op " %1, %4\n" \
+ " stcond %2, %1\n" \
+ " brne 1b" \
+ : "=&r" (result), "=&r" (val), "=o" (v->counter) \
+ : "m" (v->counter), #asm_con (i) \
+ : "cc"); \
+ \
+ return result; \
+}
+
ATOMIC_OP_RETURN(sub, sub, rKs21)
ATOMIC_OP_RETURN(add, add, r)
+ATOMIC_FETCH_OP (sub, sub, rKs21)
+ATOMIC_FETCH_OP (add, add, r)
-#define ATOMIC_OP(op, asm_op) \
+#define ATOMIC_OPS(op, asm_op) \
ATOMIC_OP_RETURN(op, asm_op, r) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
(void)__atomic_##op##_return(i, v); \
+} \
+ATOMIC_FETCH_OP(op, asm_op, r) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __atomic_fetch_##op(i, v); \
}
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, eor)
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
/*
@@ -87,6 +115,14 @@ static inline int atomic_add_return(int i, atomic_t *v)
return __atomic_add_return(i, v);
}
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ if (IS_21BIT_CONST(i))
+ return __atomic_fetch_sub(-i, v);
+
+ return __atomic_fetch_add(i, v);
+}
+
/*
* atomic_sub_return - subtract the atomic variable
* @i: integer value to subtract
@@ -102,6 +138,14 @@ static inline int atomic_sub_return(int i, atomic_t *v)
return __atomic_add_return(-i, v);
}
+static inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+ if (IS_21BIT_CONST(i))
+ return __atomic_fetch_sub(i, v);
+
+ return __atomic_fetch_add(-i, v);
+}
+
/*
* __atomic_add_unless - add unless the number is a given value
* @v: pointer of type atomic_t
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 1c1c42330c99..63c7deceeeb6 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -17,6 +17,7 @@
asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value);
asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value);
asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value);
@@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
#define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
#define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
+#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i)
+#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i))
+
#define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i)
#define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
#define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
+#define atomic_fetch_or(i, v) __raw_atomic_or_asm(&(v)->counter, i)
+#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i)
+#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i)
+
#endif
#include <asm-generic/atomic.h>
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 490c7caa02d9..c58f4a83ed6f 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -12,6 +12,8 @@
#else
#include <linux/atomic.h>
+#include <asm/processor.h>
+#include <asm/barrier.h>
asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
@@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
- while (arch_spin_is_locked(lock))
- cpu_relax();
+ smp_cond_load_acquire(&lock->lock, !VAL);
}
static inline int arch_read_can_lock(arch_rwlock_t *rw)
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index a401c27b69b4..68096e8f787f 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16);
#ifdef CONFIG_SMP
EXPORT_SYMBOL(__raw_atomic_add_asm);
+EXPORT_SYMBOL(__raw_atomic_xadd_asm);
EXPORT_SYMBOL(__raw_atomic_and_asm);
EXPORT_SYMBOL(__raw_atomic_or_asm);
EXPORT_SYMBOL(__raw_atomic_xor_asm);
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
index 26fccb5568b9..1e2989c5d6b2 100644
--- a/arch/blackfin/mach-bf561/atomic.S
+++ b/arch/blackfin/mach-bf561/atomic.S
@@ -607,6 +607,28 @@ ENDPROC(___raw_atomic_add_asm)
/*
* r0 = ptr
+ * r1 = value
+ *
+ * ADD a signed value to a 32bit word and return the old value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_xadd_asm)
+ p1 = r0;
+ r3 = r1;
+ [--sp] = rets;
+ call _get_core_lock;
+ r3 = [p1];
+ r2 = r3 + r2;
+ [p1] = r2;
+ r1 = p1;
+ call _put_core_lock;
+ r0 = r3;
+ rets = [sp++];
+ rts;
+ENDPROC(___raw_atomic_add_asm)
+
+/*
+ * r0 = ptr
* r1 = mask
*
* AND the mask bits from a 32bit word and return the old 32bit value
@@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm)
r3 = r1;
[--sp] = rets;
call _get_core_lock;
- r2 = [p1];
- r3 = r2 & r3;
- [p1] = r3;
- r3 = r2;
+ r3 = [p1];
+ r2 = r2 & r3;
+ [p1] = r2;
r1 = p1;
call _put_core_lock;
r0 = r3;
@@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm)
r3 = r1;
[--sp] = rets;
call _get_core_lock;
- r2 = [p1];
- r3 = r2 | r3;
- [p1] = r3;
- r3 = r2;
+ r3 = [p1];
+ r2 = r2 | r3;
+ [p1] = r2;
r1 = p1;
call _put_core_lock;
r0 = r3;
@@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm)
r3 = r1;
[--sp] = rets;
call _get_core_lock;
- r2 = [p1];
- r3 = r2 ^ r3;
- [p1] = r3;
- r3 = r2;
+ r3 = [p1];
+ r2 = r2 ^ r3;
+ [p1] = r2;
r1 = p1;
call _put_core_lock;
r0 = r3;
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 64f02d451aa8..1c2a5e264fc7 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
return atomic_add_return(i, v) < 0;
}
-static inline void atomic_add(int i, atomic_t *v)
-{
- atomic_add_return(i, v);
-}
-
-static inline void atomic_sub(int i, atomic_t *v)
-{
- atomic_sub_return(i, v);
-}
-
static inline void atomic_inc(atomic_t *v)
{
atomic_inc_return(v);
@@ -136,16 +126,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v)
return atomic64_add_return(i, v) < 0;
}
-static inline void atomic64_add(long long i, atomic64_t *v)
-{
- atomic64_add_return(i, v);
-}
-
-static inline void atomic64_sub(long long i, atomic64_t *v)
-{
- atomic64_sub_return(i, v);
-}
-
static inline void atomic64_inc(atomic64_t *v)
{
atomic64_inc_return(v);
@@ -182,11 +162,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
}
#define ATOMIC_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __atomic32_fetch_##op(i, &v->counter); \
+} \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
(void)__atomic32_fetch_##op(i, &v->counter); \
} \
\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
+{ \
+ return __atomic64_fetch_##op(i, &v->counter); \
+} \
static inline void atomic64_##op(long long i, atomic64_t *v) \
{ \
(void)__atomic64_fetch_##op(i, &v->counter); \
@@ -195,6 +183,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \
ATOMIC_OP(or)
ATOMIC_OP(and)
ATOMIC_OP(xor)
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
#undef ATOMIC_OP
diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h
index 36e126d2f801..d4912c88b829 100644
--- a/arch/frv/include/asm/atomic_defs.h
+++ b/arch/frv/include/asm/atomic_defs.h
@@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op);
ATOMIC_FETCH_OP(or)
ATOMIC_FETCH_OP(and)
ATOMIC_FETCH_OP(xor)
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
ATOMIC_OP_RETURN(add)
ATOMIC_OP_RETURN(sub)
diff --git a/arch/frv/include/asm/serial.h b/arch/frv/include/asm/serial.h
index bce0d0d07e60..614c6d76789a 100644
--- a/arch/frv/include/asm/serial.h
+++ b/arch/frv/include/asm/serial.h
@@ -12,7 +12,3 @@
* the base baud is derived from the clock speed and so is variable
*/
#define BASE_BAUD 0
-
-#define STD_COM_FLAGS UPF_BOOT_AUTOCONF
-
-#define SERIAL_PORT_DFNS
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 4435a445ae7e..349a47a918db 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return ret; \
}
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ h8300flags flags; \
+ int ret; \
+ \
+ flags = arch_local_irq_save(); \
+ ret = v->counter; \
+ v->counter c_op i; \
+ arch_local_irq_restore(flags); \
+ return ret; \
+}
+
#define ATOMIC_OP(op, c_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
@@ -41,17 +54,21 @@ static inline void atomic_##op(int i, atomic_t *v) \
ATOMIC_OP_RETURN(add, +=)
ATOMIC_OP_RETURN(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+ATOMIC_OPS(add, +=)
+ATOMIC_OPS(sub, -=)
+#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define atomic_add(i, v) (void)atomic_add_return(i, v)
#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
-
-#define atomic_sub(i, v) (void)atomic_sub_return(i, v)
#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
#define atomic_inc_return(v) atomic_add_return(1, v)
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 55696c4100d4..a62ba368b27d 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v) \
); \
} \
-#define ATOMIC_OP_RETURN(op) \
+#define ATOMIC_OP_RETURN(op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
int output; \
@@ -127,16 +127,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return output; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int output, val; \
+ \
+ __asm__ __volatile__ ( \
+ "1: %0 = memw_locked(%2);\n" \
+ " %1 = "#op "(%0,%3);\n" \
+ " memw_locked(%2,P3)=%1;\n" \
+ " if !P3 jump 1b;\n" \
+ : "=&r" (output), "=&r" (val) \
+ : "r" (&v->counter), "r" (i) \
+ : "memory", "p3" \
+ ); \
+ return output; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index 12ca4ebc0338..a1c55788c5d6 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -23,6 +23,8 @@
#define _ASM_SPINLOCK_H
#include <asm/irqflags.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* This file is pulled in for SMP builds.
@@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
* SMP spinlocks are intended to allow only a single CPU at the lock
*/
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(lock) \
- do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
+
#define arch_spin_is_locked(x) ((x)->lock != 0)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 8dfb5f6f6c35..f565ad376142 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v) \
return new; \
}
-ATOMIC_OP(add, +)
-ATOMIC_OP(sub, -)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static __inline__ int \
+ia64_atomic_fetch_##op (int i, atomic_t *v) \
+{ \
+ __s32 old, new; \
+ CMPXCHG_BUGCHECK_DECL \
+ \
+ do { \
+ CMPXCHG_BUGCHECK(v); \
+ old = atomic_read(v); \
+ new = old c_op i; \
+ } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \
+ return old; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
#define atomic_add_return(i,v) \
({ \
@@ -69,14 +88,44 @@ ATOMIC_OP(sub, -)
: ia64_atomic_sub(__ia64_asr_i, v); \
})
-ATOMIC_OP(and, &)
-ATOMIC_OP(or, |)
-ATOMIC_OP(xor, ^)
+#define atomic_fetch_add(i,v) \
+({ \
+ int __ia64_aar_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \
+ || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \
+ || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \
+ || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \
+ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \
+ : ia64_atomic_fetch_add(__ia64_aar_i, v); \
+})
+
+#define atomic_fetch_sub(i,v) \
+({ \
+ int __ia64_asr_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \
+ || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \
+ || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \
+ || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \
+ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \
+ : ia64_atomic_fetch_sub(__ia64_asr_i, v); \
+})
+
+ATOMIC_FETCH_OP(and, &)
+ATOMIC_FETCH_OP(or, |)
+ATOMIC_FETCH_OP(xor, ^)
+
+#define atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v)
+#define atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v)
+#define atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v)
-#define atomic_and(i,v) (void)ia64_atomic_and(i,v)
-#define atomic_or(i,v) (void)ia64_atomic_or(i,v)
-#define atomic_xor(i,v) (void)ia64_atomic_xor(i,v)
+#define atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v)
+#define atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v)
+#define atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v)
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP
#define ATOMIC64_OP(op, c_op) \
@@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \
return new; \
}
-ATOMIC64_OP(add, +)
-ATOMIC64_OP(sub, -)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static __inline__ long \
+ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \
+{ \
+ __s64 old, new; \
+ CMPXCHG_BUGCHECK_DECL \
+ \
+ do { \
+ CMPXCHG_BUGCHECK(v); \
+ old = atomic64_read(v); \
+ new = old c_op i; \
+ } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \
+ return old; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +)
+ATOMIC64_OPS(sub, -)
#define atomic64_add_return(i,v) \
({ \
@@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -)
: ia64_atomic64_sub(__ia64_asr_i, v); \
})
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define atomic64_fetch_add(i,v) \
+({ \
+ long __ia64_aar_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \
+ || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \
+ || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \
+ || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \
+ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \
+ : ia64_atomic64_fetch_add(__ia64_aar_i, v); \
+})
+
+#define atomic64_fetch_sub(i,v) \
+({ \
+ long __ia64_asr_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \
+ || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \
+ || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \
+ || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \
+ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \
+ : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \
+})
+
+ATOMIC64_FETCH_OP(and, &)
+ATOMIC64_FETCH_OP(or, |)
+ATOMIC64_FETCH_OP(xor, ^)
+
+#define atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v)
+#define atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v)
+#define atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v)
-#define atomic64_and(i,v) (void)ia64_atomic64_and(i,v)
-#define atomic64_or(i,v) (void)ia64_atomic64_or(i,v)
-#define atomic64_xor(i,v) (void)ia64_atomic64_xor(i,v)
+#define atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v)
+#define atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v)
+#define atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v)
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP
#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d65e31..28cb819e0ff9 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (cmpxchg_acq(count, 1, 0) == 1)
+ if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
return 1;
return 0;
}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 8b23e070b844..8fa98dd303b4 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -40,7 +40,7 @@
static inline void
__down_read (struct rw_semaphore *sem)
{
- long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+ long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
if (result < 0)
rwsem_down_read_failed(sem);
@@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old + RWSEM_ACTIVE_WRITE_BIAS;
- } while (cmpxchg_acq(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
return old;
}
@@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem)
static inline void
__up_read (struct rw_semaphore *sem)
{
- long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+ long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
@@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old - RWSEM_ACTIVE_WRITE_BIAS;
- } while (cmpxchg_rel(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
@@ -115,8 +115,8 @@ static inline int
__down_read_trylock (struct rw_semaphore *sem)
{
long tmp;
- while ((tmp = sem->count) >= 0) {
- if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+ while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+ if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
return 1;
}
}
@@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem)
static inline int
__down_write_trylock (struct rw_semaphore *sem)
{
- long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long tmp = atomic_long_cmpxchg_acquire(&sem->count,
+ RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
return tmp == RWSEM_UNLOCKED_VALUE;
}
@@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old - RWSEM_WAITING_BIAS;
- } while (cmpxchg_rel(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (old < 0)
rwsem_downgrade_wake(sem);
}
-/*
- * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1
- * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
- */
-#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
-#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 45698cd15b7b..ca9e76149a4a 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -15,6 +15,8 @@
#include <linux/atomic.h>
#include <asm/intrinsics.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
#define arch_spin_lock_init(x) ((x)->lock = 0)
@@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
return;
cpu_relax();
}
+
+ smp_acquire__after_ctrl_dep();
}
static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c
index 01d877c6868f..cf3023dced49 100644
--- a/arch/m32r/boot/compressed/m32r_sio.c
+++ b/arch/m32r/boot/compressed/m32r_sio.c
@@ -8,12 +8,13 @@
#include <asm/processor.h>
-static void putc(char c);
+static void m32r_putc(char c);
static int puts(const char *s)
{
char c;
- while ((c = *s++)) putc(c);
+ while ((c = *s++))
+ m32r_putc(c);
return 0;
}
@@ -41,7 +42,7 @@ static int puts(const char *s)
#define BOOT_SIO0TXB PLD_ESIO0TXB
#endif
-static void putc(char c)
+static void m32r_putc(char c)
{
while ((*BOOT_SIO0STS & 0x3) != 0x3)
cpu_relax();
@@ -61,7 +62,7 @@ static void putc(char c)
#define SIO0TXB (volatile unsigned short *)(0x00efd000 + 30)
#endif
-static void putc(char c)
+static void m32r_putc(char c)
{
while ((*SIO0STS & 0x1) == 0)
cpu_relax();
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index ea35160d632b..640cc1c7099f 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -89,16 +89,44 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ int result, val; \
+ \
+ local_irq_save(flags); \
+ __asm__ __volatile__ ( \
+ "# atomic_fetch_" #op " \n\t" \
+ DCACHE_CLEAR("%0", "r4", "%2") \
+ M32R_LOCK" %1, @%2; \n\t" \
+ "mv %0, %1 \n\t" \
+ #op " %1, %3; \n\t" \
+ M32R_UNLOCK" %1, @%2; \n\t" \
+ : "=&r" (result), "=&r" (val) \
+ : "r" (&v->counter), "r" (i) \
+ : "memory" \
+ __ATOMIC_CLOBBER \
+ ); \
+ local_irq_restore(flags); \
+ \
+ return result; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index fa13694eaae3..323c7fc953cd 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -13,6 +13,8 @@
#include <linux/atomic.h>
#include <asm/dcache_clear.h>
#include <asm/page.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -27,8 +29,11 @@
#define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, VAL > 0);
+}
/**
* arch_spin_trylock - Try spin lock and return a result
diff --git a/arch/m68k/coldfire/head.S b/arch/m68k/coldfire/head.S
index fa31be297b85..73d92ea0ce65 100644
--- a/arch/m68k/coldfire/head.S
+++ b/arch/m68k/coldfire/head.S
@@ -288,7 +288,7 @@ _clear_bss:
#endif
/*
- * Assember start up done, start code proper.
+ * Assembler start up done, start code proper.
*/
jsr start_kernel /* start Linux kernel */
diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c
index c525e4c08f84..9abb1a441da0 100644
--- a/arch/m68k/coldfire/m5272.c
+++ b/arch/m68k/coldfire/m5272.c
@@ -111,7 +111,7 @@ void __init config_BSP(char *commandp, int size)
/***************************************************************************/
/*
- * Some 5272 based boards have the FEC ethernet diectly connected to
+ * Some 5272 based boards have the FEC ethernet directly connected to
* an ethernet switch. In this case we need to use the fixed phy type,
* and we need to declare it early in boot.
*/
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index 821de928dc3f..6a640be48568 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -42,7 +42,7 @@ static unsigned long iospace;
/*
* We need to be carefull probing on bus 0 (directly connected to host
- * bridge). We should only acccess the well defined possible devices in
+ * bridge). We should only access the well defined possible devices in
* use, ignore aliases and the like.
*/
static unsigned char mcf_host_slot2sid[32] = {
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 3ee6976f6088..8f5b6f7dd136 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -359,6 +360,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -553,7 +555,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index e96787ffcbce..31bded9c83d4 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -341,6 +342,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -512,7 +514,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 083fe6beac14..0d7739e04ae2 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -350,6 +351,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -533,7 +535,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 475130c06dcb..2cbb5c465fec 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -340,6 +341,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 4339658c200f..96102a42c156 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -341,6 +342,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -514,7 +516,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 831cc8c3a2e2..97d88f7dc5a7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -357,6 +358,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -536,7 +538,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 6377afeb522b..be25ef208f0f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -390,6 +391,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -616,7 +618,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 4304b3d56262..a008344360c9 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -339,6 +340,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 074bda4094ff..6735a25f36d4 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -340,6 +341,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 07b9fa8d7f2e..780c6e9f6cf9 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -346,6 +347,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -527,7 +529,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 36e6fae02d45..44693cf361e5 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -337,6 +338,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 903acf929511..ef0071d61158 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_USERFAULTFD=y
CONFIG_SLAB=y
CONFIG_MODULES=y
@@ -337,6 +338,7 @@ CONFIG_MACVTAP=m
CONFIG_IPVLAN=m
CONFIG_VXLAN=m
CONFIG_GENEVE=m
+CONFIG_GTP=m
CONFIG_MACSEC=m
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
@@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S
index 78cb60f5bb4d..9bbffebe3eb5 100644
--- a/arch/m68k/ifpsp060/src/fpsp.S
+++ b/arch/m68k/ifpsp060/src/fpsp.S
@@ -10191,7 +10191,7 @@ xdnrm_con:
xdnrm_sd:
mov.l %a1,-(%sp)
tst.b LOCAL_EX(%a0) # is denorm pos or neg?
- smi.b %d1 # set d0 accodingly
+ smi.b %d1 # set d0 accordingly
bsr.l unf_sub
mov.l (%sp)+,%a1
xdnrm_exit:
@@ -10990,7 +10990,7 @@ src_qnan_m:
# routines where an instruction is selected by an index into
# a large jump table corresponding to a given instruction which
# has been decoded. Flow continues here where we now decode
-# further accoding to the source operand type.
+# further according to the source operand type.
#
global fsinh
@@ -23196,14 +23196,14 @@ m_sign:
#
# 1. Branch on the sign of the adjusted exponent.
# 2p.(positive exp)
-# 2. Check M16 and the digits in lwords 2 and 3 in decending order.
+# 2. Check M16 and the digits in lwords 2 and 3 in descending order.
# 3. Add one for each zero encountered until a non-zero digit.
# 4. Subtract the count from the exp.
# 5. Check if the exp has crossed zero in #3 above; make the exp abs
# and set SE.
# 6. Multiply the mantissa by 10**count.
# 2n.(negative exp)
-# 2. Check the digits in lwords 3 and 2 in decending order.
+# 2. Check the digits in lwords 3 and 2 in descending order.
# 3. Add one for each zero encountered until a non-zero digit.
# 4. Add the count to the exp.
# 5. Check if the exp has crossed zero in #3 above; clear SE.
diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S
index 4aedef973cf6..3535e6c87eec 100644
--- a/arch/m68k/ifpsp060/src/pfpsp.S
+++ b/arch/m68k/ifpsp060/src/pfpsp.S
@@ -13156,14 +13156,14 @@ m_sign:
#
# 1. Branch on the sign of the adjusted exponent.
# 2p.(positive exp)
-# 2. Check M16 and the digits in lwords 2 and 3 in decending order.
+# 2. Check M16 and the digits in lwords 2 and 3 in descending order.
# 3. Add one for each zero encountered until a non-zero digit.
# 4. Subtract the count from the exp.
# 5. Check if the exp has crossed zero in #3 above; make the exp abs
# and set SE.
# 6. Multiply the mantissa by 10**count.
# 2n.(negative exp)
-# 2. Check the digits in lwords 3 and 2 in decending order.
+# 2. Check the digits in lwords 3 and 2 in descending order.
# 3. Add one for each zero encountered until a non-zero digit.
# 4. Add the count to the exp.
# 5. Check if the exp has crossed zero in #3 above; clear SE.
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 4858178260f9..cf4c3a7b1a45 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -53,6 +53,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return t; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int t, tmp; \
+ \
+ __asm__ __volatile__( \
+ "1: movel %2,%1\n" \
+ " " #asm_op "l %3,%1\n" \
+ " casl %2,%1,%0\n" \
+ " jne 1b" \
+ : "+m" (*v), "=&d" (t), "=&d" (tmp) \
+ : "g" (i), "2" (atomic_read(v))); \
+ return tmp; \
+}
+
#else
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
@@ -68,20 +83,41 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \
return t; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t * v) \
+{ \
+ unsigned long flags; \
+ int t; \
+ \
+ local_irq_save(flags); \
+ t = v->counter; \
+ v->counter c_op i; \
+ local_irq_restore(flags); \
+ \
+ return t; \
+}
+
#endif /* CONFIG_RMW_INSNS */
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, eor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h
index 429fe26e320c..208b4daa14b3 100644
--- a/arch/m68k/include/asm/dma.h
+++ b/arch/m68k/include/asm/dma.h
@@ -18,7 +18,7 @@
* AUG/22/2000 : added support for 32-bit Dual-Address-Mode (K) 2000
* Oliver Kamphenkel (O.Kamphenkel@tu-bs.de)
*
- * AUG/25/2000 : addad support for 8, 16 and 32-bit Single-Address-Mode (K)2000
+ * AUG/25/2000 : added support for 8, 16 and 32-bit Single-Address-Mode (K)2000
* Oliver Kamphenkel (O.Kamphenkel@tu-bs.de)
*
* APR/18/2002 : added proper support for MCF5272 DMA controller.
diff --git a/arch/m68k/include/asm/m525xsim.h b/arch/m68k/include/asm/m525xsim.h
index f186459072e9..699f20c8a0fe 100644
--- a/arch/m68k/include/asm/m525xsim.h
+++ b/arch/m68k/include/asm/m525xsim.h
@@ -123,10 +123,10 @@
/*
* I2C module.
*/
-#define MCFI2C_BASE0 (MCF_MBAR + 0x280) /* Base addreess I2C0 */
+#define MCFI2C_BASE0 (MCF_MBAR + 0x280) /* Base address I2C0 */
#define MCFI2C_SIZE0 0x20 /* Register set size */
-#define MCFI2C_BASE1 (MCF_MBAR2 + 0x440) /* Base addreess I2C1 */
+#define MCFI2C_BASE1 (MCF_MBAR2 + 0x440) /* Base address I2C1 */
#define MCFI2C_SIZE1 0x20 /* Register set size */
/*
diff --git a/arch/m68k/include/asm/mcfmmu.h b/arch/m68k/include/asm/mcfmmu.h
index 26cc3d5a63f8..8824236e303f 100644
--- a/arch/m68k/include/asm/mcfmmu.h
+++ b/arch/m68k/include/asm/mcfmmu.h
@@ -38,7 +38,7 @@
/*
* MMU Operation register.
*/
-#define MMUOR_UAA 0x00000001 /* Update allocatiom address */
+#define MMUOR_UAA 0x00000001 /* Update allocation address */
#define MMUOR_ACC 0x00000002 /* TLB access */
#define MMUOR_RD 0x00000004 /* TLB access read */
#define MMUOR_WR 0x00000000 /* TLB access write */
diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h
index fc5b36278d04..c48d21b68f04 100644
--- a/arch/m68k/include/asm/q40_master.h
+++ b/arch/m68k/include/asm/q40_master.h
@@ -1,6 +1,6 @@
/*
* Q40 master Chip Control
- * RTC stuff merged for compactnes..
+ * RTC stuff merged for compactness.
*/
#ifndef _Q40_MASTER_H
diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 4d2adfb32a2a..7990b6f50105 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -60,7 +60,7 @@
*
* The host talks to the IOPs using a rather simple message-passing scheme via
* a shared memory area in the IOP RAM. Each IOP has seven "channels"; each
- * channel is conneced to a specific software driver on the IOP. For example
+ * channel is connected to a specific software driver on the IOP. For example
* on the SCC IOP there is one channel for each serial port. Each channel has
* an incoming and and outgoing message queue with a depth of one.
*
diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h
index 759679d9ab96..6d1e760e2a0e 100644
--- a/arch/m68k/math-emu/fp_decode.h
+++ b/arch/m68k/math-emu/fp_decode.h
@@ -130,7 +130,7 @@ do_fscc=0
bfextu %d2{#13,#3},%d0
.endm
-| decode the 8bit diplacement from the brief extension word
+| decode the 8bit displacement from the brief extension word
.macro fp_decode_disp8
move.b %d2,%d0
ext.w %d0
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 88fa25fae8bd..def2c642f053 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int result, temp; \
+ \
+ smp_mb(); \
+ \
+ asm volatile ( \
+ "1: LNKGETD %1, [%2]\n" \
+ " " #op " %0, %1, %3\n" \
+ " LNKSETD [%2], %0\n" \
+ " DEFR %0, TXSTAT\n" \
+ " ANDT %0, %0, #HI(0x3f000000)\n" \
+ " CMPT %0, #HI(0x02000000)\n" \
+ " BNZ 1b\n" \
+ : "=&d" (temp), "=&d" (result) \
+ : "da" (&v->counter), "bd" (i) \
+ : "cc"); \
+ \
+ smp_mb(); \
+ \
+ return result; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 0295d9b8d5bf..6c1380a8a0d4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long result; \
+ unsigned long flags; \
+ \
+ __global_lock1(flags); \
+ result = v->counter; \
+ fence(); \
+ v->counter c_op i; \
+ __global_unlock1(flags); \
+ \
+ return result; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_OP_RETURN(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index 86a7cf3d1386..c0c7a22be1ae 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -1,14 +1,24 @@
#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
#ifdef CONFIG_METAG_ATOMICITY_LOCK1
#include <asm/spinlock_lock1.h>
#else
#include <asm/spinlock_lnkget.h>
#endif
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+/*
+ * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1
+ * locked.
+ */
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 835b402e4574..0ab176bdb8e8 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
" " #asm_op " %0, %2 \n" \
" sc %0, %1 \n" \
" .set mips0 \n" \
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
: "Ir" (i)); \
} while (unlikely(!temp)); \
} else { \
@@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
}
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
-static __inline__ int atomic_##op##_return(int i, atomic_t * v) \
+static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
{ \
int result; \
\
- smp_mb__before_llsc(); \
- \
if (kernel_uses_llsc && R10000_LLSC_WAR) { \
int temp; \
\
@@ -125,23 +123,84 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \
raw_local_irq_restore(flags); \
} \
\
- smp_llsc_mb(); \
+ return result; \
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
+{ \
+ int result; \
+ \
+ if (kernel_uses_llsc && R10000_LLSC_WAR) { \
+ int temp; \
+ \
+ __asm__ __volatile__( \
+ " .set arch=r4000 \n" \
+ "1: ll %1, %2 # atomic_fetch_" #op " \n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " sc %0, %2 \n" \
+ " beqzl %0, 1b \n" \
+ " move %0, %1 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } else if (kernel_uses_llsc) { \
+ int temp; \
+ \
+ do { \
+ __asm__ __volatile__( \
+ " .set "MIPS_ISA_LEVEL" \n" \
+ " ll %1, %2 # atomic_fetch_" #op " \n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " sc %0, %2 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } while (unlikely(!result)); \
+ \
+ result = temp; \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ result = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ } \
\
return result; \
}
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, addu)
ATOMIC_OPS(sub, -=, subu)
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -362,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
}
#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
{ \
long result; \
\
- smp_mb__before_llsc(); \
- \
if (kernel_uses_llsc && R10000_LLSC_WAR) { \
long temp; \
\
@@ -409,22 +466,85 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
raw_local_irq_restore(flags); \
} \
\
- smp_llsc_mb(); \
+ return result; \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+{ \
+ long result; \
+ \
+ if (kernel_uses_llsc && R10000_LLSC_WAR) { \
+ long temp; \
+ \
+ __asm__ __volatile__( \
+ " .set arch=r4000 \n" \
+ "1: lld %1, %2 # atomic64_fetch_" #op "\n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " scd %0, %2 \n" \
+ " beqzl %0, 1b \n" \
+ " move %0, %1 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } else if (kernel_uses_llsc) { \
+ long temp; \
+ \
+ do { \
+ __asm__ __volatile__( \
+ " .set "MIPS_ISA_LEVEL" \n" \
+ " lld %1, %2 # atomic64_fetch_" #op "\n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " scd %0, %2 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "=" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \
+ : "memory"); \
+ } while (unlikely(!result)); \
+ \
+ result = temp; \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ result = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ } \
\
return result; \
}
#define ATOMIC64_OPS(op, c_op, asm_op) \
ATOMIC64_OP(op, c_op, asm_op) \
- ATOMIC64_OP_RETURN(op, c_op, asm_op)
+ ATOMIC64_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC64_FETCH_OP(op, c_op, asm_op)
ATOMIC64_OPS(add, +=, daddu)
ATOMIC64_OPS(sub, -=, dsubu)
-ATOMIC64_OP(and, &=, and)
-ATOMIC64_OP(or, |=, or)
-ATOMIC64_OP(xor, ^=, xor)
+
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op, asm_op) \
+ ATOMIC64_OP(op, c_op, asm_op) \
+ ATOMIC64_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC64_OPS(and, &=, and)
+ATOMIC64_OPS(or, |=, or)
+ATOMIC64_OPS(xor, ^=, xor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index a6b611f1da43..7d44e888134f 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -24,7 +24,7 @@ struct mm_struct;
struct vm_area_struct;
#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
- _CACHE_CACHABLE_NONCOHERENT)
+ _page_cachable_default)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
_page_cachable_default)
#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
@@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK;
- pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
+ pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
return pte;
}
#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#else
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
}
#endif
@@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd)
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+ pmd_val(pmd) = (pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HUGE)) |
+ (pgprot_val(newprot) & ~_PAGE_CHG_MASK);
return pmd;
}
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 40196bebe849..f485afe51514 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -12,6 +12,7 @@
#include <linux/compiler.h>
#include <asm/barrier.h>
+#include <asm/processor.h>
#include <asm/compiler.h>
#include <asm/war.h>
@@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- while (arch_spin_is_locked(x)) { cpu_relax(); }
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ u16 owner = READ_ONCE(lock->h.serving_now);
+ smp_rmb();
+ for (;;) {
+ arch_spinlock_t tmp = READ_ONCE(*lock);
+
+ if (tmp.h.serving_now == tmp.h.ticket ||
+ tmp.h.serving_now != owner)
+ break;
+
+ cpu_relax();
+ }
+ smp_acquire__after_ctrl_dep();
+}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index ce318d5ab23b..36389efd45e8 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -84,16 +84,41 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return retval; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int retval, status; \
+ \
+ asm volatile( \
+ "1: mov %4,(_AAR,%3) \n" \
+ " mov (_ADR,%3),%1 \n" \
+ " mov %1,%0 \n" \
+ " " #op " %5,%0 \n" \
+ " mov %0,(_ADR,%3) \n" \
+ " mov (_ADR,%3),%0 \n" /* flush */ \
+ " mov (_ASR,%3),%0 \n" \
+ " or %0,%0 \n" \
+ " bne 1b \n" \
+ : "=&r"(status), "=&r"(retval), "=m"(v->counter) \
+ : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \
+ : "memory", "cc"); \
+ return retval; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 1ae580f38933..9c7b8f7942d8 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -12,6 +12,8 @@
#define _ASM_SPINLOCK_H
#include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
#include <asm/rwlock.h>
#include <asm/page.h>
@@ -23,7 +25,11 @@
*/
#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0)
-#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, !VAL);
+}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 1d109990a022..5394b9c5f914 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -121,16 +121,39 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \
return ret; \
}
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ int ret; \
+ \
+ _atomic_spin_lock_irqsave(v, flags); \
+ ret = v->counter; \
+ v->counter c_op i; \
+ _atomic_spin_unlock_irqrestore(v, flags); \
+ \
+ return ret; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_OP_RETURN(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -185,15 +208,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \
return ret; \
}
-#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ s64 ret; \
+ \
+ _atomic_spin_lock_irqsave(v, flags); \
+ ret = v->counter; \
+ v->counter c_op i; \
+ _atomic_spin_unlock_irqrestore(v, flags); \
+ \
+ return ret; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
ATOMIC64_OPS(add, +=)
ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 64f2992e439f..e32936cd7f10 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
}
#define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-#define arch_spin_unlock_wait(x) \
- do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
+{
+ volatile unsigned int *a = __ldcw_align(x);
+
+ smp_cond_load_acquire(a, VAL);
+}
static inline void arch_spin_lock_flags(arch_spinlock_t *x,
unsigned long flags)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index ae0751ef8788..f08d567e0ca4 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
return t; \
}
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
+static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
+{ \
+ int res, t; \
+ \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \
+ #asm_op " %1,%3,%0\n" \
+ PPC405_ERR77(0, %4) \
+" stwcx. %1,0,%4\n" \
+" bne- 1b\n" \
+ : "=&r" (res), "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ \
+ return res; \
+}
+
#define ATOMIC_OPS(op, asm_op) \
ATOMIC_OP(op, asm_op) \
- ATOMIC_OP_RETURN_RELAXED(op, asm_op)
+ ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op)
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, subf)
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, xor)
-
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op) \
+ ATOMIC_OP(op, asm_op) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP_RELAXED
#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
@@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
return t; \
}
+#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
+static inline long \
+atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \
+{ \
+ long res, t; \
+ \
+ __asm__ __volatile__( \
+"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
+ #asm_op " %1,%3,%0\n" \
+" stdcx. %1,0,%4\n" \
+" bne- 1b\n" \
+ : "=&r" (res), "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ \
+ return res; \
+}
+
#define ATOMIC64_OPS(op, asm_op) \
ATOMIC64_OP(op, asm_op) \
- ATOMIC64_OP_RETURN_RELAXED(op, asm_op)
+ ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
+ ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, subf)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(or, or)
-ATOMIC64_OP(xor, xor)
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op) \
+ ATOMIC64_OP(op, asm_op) \
+ ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(or, or)
+ATOMIC64_OPS(xor, xor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+
#undef ATOPIC64_OPS
+#undef ATOMIC64_FETCH_OP_RELAXED
#undef ATOMIC64_OP_RETURN_RELAXED
#undef ATOMIC64_OP
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 88a5ecaa157b..ab84c89c9e98 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size;
#define KERN_VIRT_SIZE __kernel_virt_size
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
+extern unsigned long pci_io_base;
#endif /* __ASSEMBLY__ */
#include <asm/book3s/64/hash.h>
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23e1f6c..078155fa1189 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b5f73cb5eeb6..d70101e1e25c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -647,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
pci_unlock_rescan_remove();
}
} else if (frozen_bus) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
}
/*
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 3759df52bd67..a5ae49a2dcc4 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -47,7 +47,6 @@ static int __init pcibios_init(void)
printk(KERN_INFO "PCI: Probing PCI hardware\n");
- pci_io_base = ISA_IO_BASE;
/* For now, override phys_mem_access_prot. If we need it,g
* later, we may move that initialization to each ppc_md
*/
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e2f12cbcade9..0b93893424f5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.regs = regs - 1;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Clear any transactional state, we're exec()ing. The cause is
+ * not important as there will never be a recheckpoint so it's not
+ * user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+#endif
+
memset(regs->gpr, 0, sizeof(regs->gpr));
regs->ctr = 0;
regs->link = 0;
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index bf8f34a58670..b7019b559ddb 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
- /* We need to setup MSR for VSX register save instructions. Here we
- * also clear the MSR RI since when we do the treclaim, we won't have a
- * valid kernel pointer for a while. We clear RI here as it avoids
- * adding another mtmsr closer to the treclaim. This makes the region
- * maked as non-recoverable wider than it needs to be but it saves on
- * inserting another mtmsrd later.
- */
+ /* We need to setup MSR for VSX register save instructions. */
mfmsr r14
mr r15, r14
ori r15, r15, MSR_FP
- li r16, MSR_RI
+ li r16, 0
ori r16, r16, MSR_EE /* IRQs hard off */
andc r15, r15, r16
oris r15, r15, MSR_VEC@h
@@ -176,7 +170,17 @@ dont_backup_fp:
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* The moment we treclaim, ALL of our GPRs will switch
+ /* Clear MSR RI since we are about to change r1, EE is already off. */
+ li r4, 0
+ mtmsrd r4, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ *
+ * The moment we treclaim, ALL of our GPRs will switch
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
@@ -197,6 +201,11 @@ dont_backup_fp:
/* Store the PPR in r11 and reset to decent value */
std r11, GPR11(r1) /* Temporary stash */
+
+ /* Reset MSR RI so we can take SLB faults again */
+ li r11, MSR_RI
+ mtmsrd r11, 1
+
mfspr r11, SPRN_PPR
HMT_MEDIUM
@@ -397,11 +406,6 @@ restore_gprs:
ld r5, THREAD_TM_DSCR(r3)
ld r6, THREAD_TM_PPR(r3)
- /* Clear the MSR RI since we are about to change R1. EE is already off
- */
- li r4, 0
- mtmsrd r4, 1
-
REST_GPR(0, r7) /* GPR0 */
REST_2GPRS(2, r7) /* GPR2-3 */
REST_GPR(4, r7) /* GPR4 */
@@ -439,10 +443,33 @@ restore_gprs:
ld r6, _CCR(r7)
mtcr r6
- REST_GPR(1, r7) /* GPR1 */
- REST_GPR(5, r7) /* GPR5-7 */
REST_GPR(6, r7)
- ld r7, GPR7(r7)
+
+ /*
+ * Store r1 and r5 on the stack so that we can access them
+ * after we clear MSR RI.
+ */
+
+ REST_GPR(5, r7)
+ std r5, -8(r1)
+ ld r5, GPR1(r7)
+ std r5, -16(r1)
+
+ REST_GPR(7, r7)
+
+ /* Clear MSR RI since we are about to change r1. EE is already off */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ */
+
+ ld r5, -8(r1)
+ ld r1, -16(r1)
/* Commit register state as checkpointed state: */
TRECHKPT
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5b22ba0b58bc..2971ea18c768 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void)
vmemmap = (struct page *)H_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
/* Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index e58707deef5c..7931e1496f0d 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void)
__vmalloc_end = RADIX_VMALLOC_END;
vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
/*
* For now radix also use the same frag size
*/
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 911064aa59b2..d28cc2f5b7b2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v)
return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
}
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER);
+}
+
static inline void atomic_add(int i, atomic_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -114,22 +119,27 @@ static inline void atomic_add(int i, atomic_t *v)
#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v)
#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v)
+#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v)
#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0)
#define atomic_dec(_v) atomic_sub(1, _v)
#define atomic_dec_return(_v) atomic_sub_return(1, _v)
#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
-#define ATOMIC_OP(op, OP) \
+#define ATOMIC_OPS(op, OP) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
__ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \
+} \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \
}
-ATOMIC_OP(and, AND)
-ATOMIC_OP(or, OR)
-ATOMIC_OP(xor, XOR)
+ATOMIC_OPS(and, AND)
+ATOMIC_OPS(or, OR)
+ATOMIC_OPS(xor, XOR)
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
@@ -236,6 +246,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
}
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+ return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER);
+}
+
static inline void atomic64_add(long long i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -264,17 +279,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
return old;
}
-#define ATOMIC64_OP(op, OP) \
+#define ATOMIC64_OPS(op, OP) \
static inline void atomic64_##op(long i, atomic64_t *v) \
{ \
__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \
+} \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+{ \
+ return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \
}
-ATOMIC64_OP(and, AND)
-ATOMIC64_OP(or, OR)
-ATOMIC64_OP(xor, XOR)
+ATOMIC64_OPS(and, AND)
+ATOMIC64_OPS(or, OR)
+ATOMIC64_OPS(xor, XOR)
-#undef ATOMIC64_OP
+#undef ATOMIC64_OPS
#undef __ATOMIC64_LOOP
static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
@@ -315,6 +334,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long long)(_i), _v)
#define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v)
#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
#define atomic64_dec(_v) atomic64_sub(1, _v)
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 5e04f3cbd320..8ae236b0f80b 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc)
" la %0,0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "=d" (rc), "=d" (orig_fpc)
+ : "=d" (rc), "=&d" (orig_fpc)
: "d" (fpc), "0" (-EINVAL));
return rc;
}
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index c75e4471e618..597e7e96b59e 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- signed long old, new;
-
- asm volatile(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " agr %1,%4\n"
- " csg %0,%1,%2\n"
- " jl 0b"
- : "=&d" (old), "=&d" (new), "=Q" (sem->count)
- : "Q" (sem->count), "d" (delta)
- : "cc", "memory");
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- signed long old, new;
-
- asm volatile(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " agr %1,%4\n"
- " csg %0,%1,%2\n"
- " jl 0b"
- : "=&d" (old), "=&d" (new), "=Q" (sem->count)
- : "Q" (sem->count), "d" (delta)
- : "cc", "memory");
- return new;
-}
-
#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 63ebf37d3143..7e9e09f600fa 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,8 @@
#define __ASM_SPINLOCK_H
#include <linux/smp.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
@@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
while (arch_spin_is_locked(lock))
arch_spin_relax(lock);
+ smp_acquire__after_ctrl_dep();
}
/*
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f20abdb5630a..d14069d4b88d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2064,12 +2064,5 @@ void s390_reset_system(void)
S390_lowcore.program_new_psw.addr =
(unsigned long) s390_base_pgm_handler;
- /*
- * Clear subchannel ID and number to signal new kernel that no CCW or
- * SCSI IPL has been done (for kexec and kdump)
- */
- S390_lowcore.subchannel_id = 0;
- S390_lowcore.subchannel_nr = 0;
-
do_reset_calls();
}
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index b94df40e5f2d..d755e96c3064 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return tmp; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int res, tmp; \
+ \
+ __asm__ __volatile__ ( \
+ " .align 2 \n\t" \
+ " mova 1f, r0 \n\t" /* r0 = end point */ \
+ " mov r15, r1 \n\t" /* r1 = saved sp */ \
+ " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \
+ " mov.l @%2, %0 \n\t" /* load old value */ \
+ " mov %0, %1 \n\t" /* save old value */ \
+ " " #op " %3, %0 \n\t" /* $op */ \
+ " mov.l %0, @%2 \n\t" /* store new value */ \
+ "1: mov r1, r15 \n\t" /* LOGOUT */ \
+ : "=&r" (tmp), "=&r" (res), "+r" (v) \
+ : "r" (i) \
+ : "memory" , "r0", "r1"); \
+ \
+ return res; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index 23fcdad5773e..8e2da5fa0178 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long temp, flags; \
+ \
+ raw_local_irq_save(flags); \
+ temp = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ \
+ return temp; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_OP_RETURN(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 33d34b16d4d6..caea2c45f6c2 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long res, temp; \
+ \
+ __asm__ __volatile__ ( \
+"1: movli.l @%3, %0 ! atomic_fetch_" #op " \n" \
+" mov %0, %1 \n" \
+" " #op " %2, %0 \n" \
+" movco.l %0, @%3 \n" \
+" bf 1b \n" \
+" synco \n" \
+ : "=&z" (temp), "=&z" (res) \
+ : "r" (i), "r" (&v->counter) \
+ : "t"); \
+ \
+ return res; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index bdc0f3b6c56a..416834b60ad0 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -19,14 +19,20 @@
#error "Need movli.l/movco.l for spinlocks"
#endif
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
*/
#define arch_spin_is_locked(x) ((x)->lock <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 7dcbebbcaec6..ee3f11c43cda 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -20,9 +20,10 @@
#define ATOMIC_INIT(i) { (i) }
int atomic_add_return(int, atomic_t *);
-void atomic_and(int, atomic_t *);
-void atomic_or(int, atomic_t *);
-void atomic_xor(int, atomic_t *);
+int atomic_fetch_add(int, atomic_t *);
+int atomic_fetch_and(int, atomic_t *);
+int atomic_fetch_or(int, atomic_t *);
+int atomic_fetch_xor(int, atomic_t *);
int atomic_cmpxchg(atomic_t *, int, int);
int atomic_xchg(atomic_t *, int);
int __atomic_add_unless(atomic_t *, int, int);
@@ -35,7 +36,13 @@ void atomic_set(atomic_t *, int);
#define atomic_inc(v) ((void)atomic_add_return( 1, (v)))
#define atomic_dec(v) ((void)atomic_add_return( -1, (v)))
+#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v)))
+#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v)))
+#define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v)))
+
#define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v)))
+#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v)))
+
#define atomic_inc_return(v) (atomic_add_return( 1, (v)))
#define atomic_dec_return(v) (atomic_add_return( -1, (v)))
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index f2fbf9e16faf..24827a3f733a 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *);
int atomic_##op##_return(int, atomic_t *); \
long atomic64_##op##_return(long, atomic64_t *);
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+int atomic_fetch_##op(int, atomic_t *); \
+long atomic64_fetch_##op(long, atomic64_t *);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index bcc98fc35281..d9c5876c6121 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,12 +9,15 @@
#ifndef __ASSEMBLY__
#include <asm/psr.h>
+#include <asm/barrier.h>
#include <asm/processor.h> /* for cpu_relax */
#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 968917694978..87990b7c6b0d 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -8,6 +8,9 @@
#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/barrier.h>
+
/* To get debugging spinlocks which detect and catch
* deadlock situations, set CONFIG_DEBUG_SPINLOCK
* and rebuild your kernel.
@@ -23,9 +26,10 @@
#define arch_spin_is_locked(lp) ((lp)->lock != 0)
-#define arch_spin_unlock_wait(lp) \
- do { rmb(); \
- } while((lp)->lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index b9d63c0a7aab..2c373329d5cb 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy);
#endif /* SMP */
-#define ATOMIC_OP_RETURN(op, c_op) \
-int atomic_##op##_return(int i, atomic_t *v) \
+#define ATOMIC_FETCH_OP(op, c_op) \
+int atomic_fetch_##op(int i, atomic_t *v) \
{ \
int ret; \
unsigned long flags; \
spin_lock_irqsave(ATOMIC_HASH(v), flags); \
\
- ret = (v->counter c_op i); \
+ ret = v->counter; \
+ v->counter c_op i; \
\
spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \
return ret; \
} \
-EXPORT_SYMBOL(atomic_##op##_return);
+EXPORT_SYMBOL(atomic_fetch_##op);
-#define ATOMIC_OP(op, c_op) \
-void atomic_##op(int i, atomic_t *v) \
+#define ATOMIC_OP_RETURN(op, c_op) \
+int atomic_##op##_return(int i, atomic_t *v) \
{ \
+ int ret; \
unsigned long flags; \
spin_lock_irqsave(ATOMIC_HASH(v), flags); \
\
- v->counter c_op i; \
+ ret = (v->counter c_op i); \
\
spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \
+ return ret; \
} \
-EXPORT_SYMBOL(atomic_##op);
+EXPORT_SYMBOL(atomic_##op##_return);
ATOMIC_OP_RETURN(add, +=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+ATOMIC_FETCH_OP(add, +=)
+ATOMIC_FETCH_OP(and, &=)
+ATOMIC_FETCH_OP(or, |=)
+ATOMIC_FETCH_OP(xor, ^=)
+
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
int atomic_xchg(atomic_t *v, int new)
{
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index d6b0363f345b..a5c5a0279ccc 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -9,10 +9,11 @@
.text
- /* Two versions of the atomic routines, one that
+ /* Three versions of the atomic routines, one that
* does not return a value and does not perform
- * memory barriers, and a second which returns
- * a value and does the barriers.
+ * memory barriers, and a two which return
+ * a value, the new and old value resp. and does the
+ * barriers.
*/
#define ATOMIC_OP(op) \
@@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \
2: BACKOFF_SPIN(%o2, %o3, 1b); \
ENDPROC(atomic_##op##_return);
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \
+ BACKOFF_SETUP(%o2); \
+1: lduw [%o1], %g1; \
+ op %g1, %o0, %g7; \
+ cas [%o1], %g1, %g7; \
+ cmp %g1, %g7; \
+ bne,pn %icc, BACKOFF_LABEL(2f, 1b); \
+ nop; \
+ retl; \
+ sra %g1, 0, %o0; \
+2: BACKOFF_SPIN(%o2, %o3, 1b); \
+ENDPROC(atomic_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \
2: BACKOFF_SPIN(%o2, %o3, 1b); \
ENDPROC(atomic64_##op##_return);
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
+#define ATOMIC64_FETCH_OP(op) \
+ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \
+ BACKOFF_SETUP(%o2); \
+1: ldx [%o1], %g1; \
+ op %g1, %o0, %g7; \
+ casx [%o1], %g1, %g7; \
+ cmp %g1, %g7; \
+ bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \
+ nop; \
+ retl; \
+ mov %g1, %o0; \
+2: BACKOFF_SPIN(%o2, %o3, 1b); \
+ENDPROC(atomic64_fetch_##op);
+
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
ATOMIC64_OPS(add)
ATOMIC64_OPS(sub)
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 8eb454cfe05c..de5e97817bdb 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op);
EXPORT_SYMBOL(atomic_##op##_return); \
EXPORT_SYMBOL(atomic64_##op##_return);
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+EXPORT_SYMBOL(atomic_fetch_##op); \
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 9fc0107a9c5e..8dda3c8ff5ab 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -46,6 +46,8 @@ static inline int atomic_read(const atomic_t *v)
*/
#define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v))
+#define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v))
+
/**
* atomic_sub - subtract integer from atomic variable
* @i: integer value to subtract
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index d320ce253d86..a93774255136 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v)
_atomic_xchg_add(&v->counter, i);
}
-#define ATOMIC_OP(op) \
-unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+#define ATOMIC_OPS(op) \
+unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
- _atomic_##op((unsigned long *)&v->counter, i); \
+ _atomic_fetch_##op((unsigned long *)&v->counter, i); \
+} \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ smp_mb(); \
+ return _atomic_fetch_##op((unsigned long *)&v->counter, i); \
}
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ smp_mb();
+ return _atomic_xchg_add(&v->counter, i);
+}
/**
* atomic_add_return - add integer and return
@@ -126,16 +137,29 @@ static inline void atomic64_add(long long i, atomic64_t *v)
_atomic64_xchg_add(&v->counter, i);
}
-#define ATOMIC64_OP(op) \
-long long _atomic64_##op(long long *v, long long n); \
+#define ATOMIC64_OPS(op) \
+long long _atomic64_fetch_##op(long long *v, long long n); \
static inline void atomic64_##op(long long i, atomic64_t *v) \
{ \
- _atomic64_##op(&v->counter, i); \
+ _atomic64_fetch_##op(&v->counter, i); \
+} \
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
+{ \
+ smp_mb(); \
+ return _atomic64_fetch_##op(&v->counter, i); \
}
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+ smp_mb();
+ return _atomic64_xchg_add(&v->counter, i);
+}
/**
* atomic64_add_return - add integer and return
@@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n)
#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
#define atomic64_sub(i, v) atomic64_add(-(i), (v))
#define atomic64_dec(v) atomic64_sub(1LL, (v))
@@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n)
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
-
#endif /* !__ASSEMBLY__ */
/*
@@ -242,16 +266,16 @@ struct __get_user {
unsigned long val;
int err;
};
-extern struct __get_user __atomic_cmpxchg(volatile int *p,
+extern struct __get_user __atomic32_cmpxchg(volatile int *p,
int *lock, int o, int n);
-extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add_unless(volatile int *p,
int *lock, int o, int n);
-extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n);
extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
long long o, long long n);
extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
@@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
long long n);
extern long long __atomic64_xchg_add_unless(volatile long long *p,
int *lock, long long o, long long n);
-extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n);
/* Return failure from the atomic wrappers. */
struct __get_user __atomic_bad_address(int __user *addr);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index b0531a623653..4cefa0c9fd81 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -32,11 +32,6 @@
* on any routine which updates memory and returns a value.
*/
-static inline void atomic_add(int i, atomic_t *v)
-{
- __insn_fetchadd4((void *)&v->counter, i);
-}
-
/*
* Note a subtlety of the locking here. We are required to provide a
* full memory barrier before and after the operation. However, we
@@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v)
return val;
}
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#define ATOMIC_OPS(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int val; \
+ smp_mb(); \
+ val = __insn_fetch##op##4((void *)&v->counter, i); \
+ smp_mb(); \
+ return val; \
+} \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ __insn_fetch##op##4((void *)&v->counter, i); \
+}
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
{
int guess, oldval = v->counter;
+ smp_mb();
do {
- if (oldval == u)
- break;
guess = oldval;
- oldval = cmpxchg(&v->counter, guess, guess + a);
+ __insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+ oldval = __insn_cmpexch4(&v->counter, guess ^ i);
} while (guess != oldval);
+ smp_mb();
return oldval;
}
-static inline void atomic_and(int i, atomic_t *v)
-{
- __insn_fetchand4((void *)&v->counter, i);
-}
-
-static inline void atomic_or(int i, atomic_t *v)
-{
- __insn_fetchor4((void *)&v->counter, i);
-}
-
static inline void atomic_xor(int i, atomic_t *v)
{
int guess, oldval = v->counter;
@@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v)
} while (guess != oldval);
}
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int guess, oldval = v->counter;
+ do {
+ if (oldval == u)
+ break;
+ guess = oldval;
+ oldval = cmpxchg(&v->counter, guess, guess + a);
+ } while (guess != oldval);
+ return oldval;
+}
+
/* Now the true 64-bit operations. */
#define ATOMIC64_INIT(i) { (i) }
@@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v)
#define atomic64_read(v) READ_ONCE((v)->counter)
#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
-static inline void atomic64_add(long i, atomic64_t *v)
-{
- __insn_fetchadd((void *)&v->counter, i);
-}
-
static inline long atomic64_add_return(long i, atomic64_t *v)
{
int val;
@@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
return val;
}
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+#define ATOMIC64_OPS(op) \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+{ \
+ long val; \
+ smp_mb(); \
+ val = __insn_fetch##op((void *)&v->counter, i); \
+ smp_mb(); \
+ return val; \
+} \
+static inline void atomic64_##op(long i, atomic64_t *v) \
+{ \
+ __insn_fetch##op((void *)&v->counter, i); \
+}
+
+ATOMIC64_OPS(add)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+
+#undef ATOMIC64_OPS
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
{
long guess, oldval = v->counter;
+ smp_mb();
do {
- if (oldval == u)
- break;
guess = oldval;
- oldval = cmpxchg(&v->counter, guess, guess + a);
+ __insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+ oldval = __insn_cmpexch(&v->counter, guess ^ i);
} while (guess != oldval);
- return oldval != u;
-}
-
-static inline void atomic64_and(long i, atomic64_t *v)
-{
- __insn_fetchand((void *)&v->counter, i);
-}
-
-static inline void atomic64_or(long i, atomic64_t *v)
-{
- __insn_fetchor((void *)&v->counter, i);
+ smp_mb();
+ return oldval;
}
static inline void atomic64_xor(long i, atomic64_t *v)
@@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v)
} while (guess != oldval);
}
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+ long guess, oldval = v->counter;
+ do {
+ if (oldval == u)
+ break;
+ guess = oldval;
+ oldval = cmpxchg(&v->counter, guess, guess + a);
+ } while (guess != oldval);
+ return oldval != u;
+}
+
#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
#define atomic64_sub(i, v) atomic64_add(-(i), (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index d55222806c2f..4c419ab95ab7 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -87,6 +87,13 @@ mb_incoherent(void)
#define __smp_mb__after_atomic() __smp_mb()
#endif
+/*
+ * The TILE architecture does not do speculative reads; this ensures
+ * that a control dependency also orders against loads and already provides
+ * a LOAD->{LOAD,STORE} order and can forgo the additional RMB.
+ */
+#define smp_acquire__after_ctrl_dep() barrier()
+
#include <asm-generic/barrier.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index bbf7b666f21d..d1406a95f6b7 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -19,9 +19,9 @@
#include <asm/barrier.h>
/* Tile-specific routines to support <asm/bitops.h>. */
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask);
/**
* set_bit - Atomically set a bit in memory
@@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
*/
static inline void set_bit(unsigned nr, volatile unsigned long *addr)
{
- _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+ _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr));
}
/**
@@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
*/
static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
{
- _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+ _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
}
/**
@@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
*/
static inline void change_bit(unsigned nr, volatile unsigned long *addr)
{
- _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+ _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
}
/**
@@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
unsigned long mask = BIT_MASK(nr);
addr += BIT_WORD(nr);
smp_mb(); /* barrier for proper semantics */
- return (_atomic_or(addr, mask) & mask) != 0;
+ return (_atomic_fetch_or(addr, mask) & mask) != 0;
}
/**
@@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
unsigned long mask = BIT_MASK(nr);
addr += BIT_WORD(nr);
smp_mb(); /* barrier for proper semantics */
- return (_atomic_andn(addr, mask) & mask) != 0;
+ return (_atomic_fetch_andn(addr, mask) & mask) != 0;
}
/**
@@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr,
unsigned long mask = BIT_MASK(nr);
addr += BIT_WORD(nr);
smp_mb(); /* barrier for proper semantics */
- return (_atomic_xor(addr, mask) & mask) != 0;
+ return (_atomic_fetch_xor(addr, mask) & mask) != 0;
}
#include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index 1a6ef1b69cb1..e64a1b75fc38 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -80,16 +80,16 @@
ret = gu.err; \
}
-#define __futex_set() __futex_call(__atomic_xchg)
-#define __futex_add() __futex_call(__atomic_xchg_add)
-#define __futex_or() __futex_call(__atomic_or)
-#define __futex_andn() __futex_call(__atomic_andn)
-#define __futex_xor() __futex_call(__atomic_xor)
+#define __futex_set() __futex_call(__atomic32_xchg)
+#define __futex_add() __futex_call(__atomic32_xchg_add)
+#define __futex_or() __futex_call(__atomic32_fetch_or)
+#define __futex_andn() __futex_call(__atomic32_fetch_andn)
+#define __futex_xor() __futex_call(__atomic32_fetch_xor)
#define __futex_cmpxchg() \
{ \
- struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
- lock, oldval, oparg); \
+ struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \
+ lock, oldval, oparg); \
val = gu.val; \
ret = gu.err; \
}
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 298df1e9912a..f8128800dbf5 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v)
int _atomic_xchg(int *v, int n)
{
- return __atomic_xchg(v, __atomic_setup(v), n).val;
+ return __atomic32_xchg(v, __atomic_setup(v), n).val;
}
EXPORT_SYMBOL(_atomic_xchg);
int _atomic_xchg_add(int *v, int i)
{
- return __atomic_xchg_add(v, __atomic_setup(v), i).val;
+ return __atomic32_xchg_add(v, __atomic_setup(v), i).val;
}
EXPORT_SYMBOL(_atomic_xchg_add);
@@ -78,39 +78,39 @@ int _atomic_xchg_add_unless(int *v, int a, int u)
* to use the first argument consistently as the "old value"
* in the assembly, as is done for _atomic_cmpxchg().
*/
- return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
+ return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val;
}
EXPORT_SYMBOL(_atomic_xchg_add_unless);
int _atomic_cmpxchg(int *v, int o, int n)
{
- return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
+ return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val;
}
EXPORT_SYMBOL(_atomic_cmpxchg);
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_or);
+EXPORT_SYMBOL(_atomic_fetch_or);
-unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_and);
+EXPORT_SYMBOL(_atomic_fetch_and);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_andn);
+EXPORT_SYMBOL(_atomic_fetch_andn);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_xor);
+EXPORT_SYMBOL(_atomic_fetch_xor);
long long _atomic64_xchg(long long *v, long long n)
@@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
}
EXPORT_SYMBOL(_atomic64_cmpxchg);
-long long _atomic64_and(long long *v, long long n)
+long long _atomic64_fetch_and(long long *v, long long n)
{
- return __atomic64_and(v, __atomic_setup(v), n);
+ return __atomic64_fetch_and(v, __atomic_setup(v), n);
}
-EXPORT_SYMBOL(_atomic64_and);
+EXPORT_SYMBOL(_atomic64_fetch_and);
-long long _atomic64_or(long long *v, long long n)
+long long _atomic64_fetch_or(long long *v, long long n)
{
- return __atomic64_or(v, __atomic_setup(v), n);
+ return __atomic64_fetch_or(v, __atomic_setup(v), n);
}
-EXPORT_SYMBOL(_atomic64_or);
+EXPORT_SYMBOL(_atomic64_fetch_or);
-long long _atomic64_xor(long long *v, long long n)
+long long _atomic64_fetch_xor(long long *v, long long n)
{
- return __atomic64_xor(v, __atomic_setup(v), n);
+ return __atomic64_fetch_xor(v, __atomic_setup(v), n);
}
-EXPORT_SYMBOL(_atomic64_xor);
+EXPORT_SYMBOL(_atomic64_fetch_xor);
/*
* If any of the atomic or futex routines hit a bad address (not in
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index f611265633d6..1a70e6c0f259 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
.endif
.endm
-atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
-atomic_op _xchg, 32, "move r24, r2"
-atomic_op _xchg_add, 32, "add r24, r22, r2"
-atomic_op _xchg_add_unless, 32, \
+
+/*
+ * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
+ */
+
+atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op 32_xchg, 32, "move r24, r2"
+atomic_op 32_xchg_add, 32, "add r24, r22, r2"
+atomic_op 32_xchg_add_unless, 32, \
"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _or, 32, "or r24, r22, r2"
-atomic_op _and, 32, "and r24, r22, r2"
-atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _xor, 32, "xor r24, r22, r2"
+atomic_op 32_fetch_or, 32, "or r24, r22, r2"
+atomic_op 32_fetch_and, 32, "and r24, r22, r2"
+atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
@@ -192,9 +197,9 @@ atomic_op 64_xchg_add_unless, 64, \
{ bbns r26, 3f; add r24, r22, r4 }; \
{ bbns r27, 3f; add r25, r23, r5 }; \
slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
-atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
-atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
+atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
jrp lr /* happy backtracer */
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 88c2a53362e7..076c6cc43113 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
do {
delay_backoff(iterations++);
} while (READ_ONCE(lock->current_ticket) == curr);
+
+ /*
+ * The TILE architecture doesn't do read speculation; therefore
+ * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+ */
+ barrier();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index c8d1f94ff1fe..a4b5b2cbce93 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
do {
delay_backoff(iterations++);
} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
+
+ /*
+ * The TILE architecture doesn't do read speculation; therefore
+ * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+ */
+ barrier();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 52fef606bc54..ff574dad95cc 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -757,7 +757,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
struct boot_params *boot_params;
struct apm_bios_info *bi;
struct setup_header *hdr;
- struct efi_info *efi;
efi_loaded_image_t *image;
void *options, *handle;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
@@ -800,7 +799,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
memset(boot_params, 0x0, 0x4000);
hdr = &boot_params->hdr;
- efi = &boot_params->efi_info;
bi = &boot_params->apm_bios_info;
/* Copy the second sector to boot_params */
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee817f0..dfebbde2a4cc 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -263,7 +263,7 @@ static bool check_hw_exists(void)
msr_fail:
pr_cont("Broken PMU hardware detected, using software events only.\n");
- pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
reg, val_new);
@@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_ht_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+ /*
+ * Report conditional events depending on Hyper-Threading.
+ *
+ * This is overly conservative as usually the HT special
+ * handling is not needed if the other CPU thread is idle.
+ *
+ * Note this does not (and cannot) handle the case when thread
+ * siblings are invisible, for example with virtualization
+ * if they are owned by some other guest. The user tool
+ * has to re-read when a thread sibling gets onlined later.
+ */
+ return sprintf(page, "%s",
+ topology_max_smt_threads() > 1 ?
+ pmu_attr->event_str_ht :
+ pmu_attr->event_str_noht);
+}
+
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
@@ -2319,7 +2342,7 @@ void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
struct stack_frame frame;
- const void __user *fp;
+ const unsigned long __user *fp;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
@@ -2332,7 +2355,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
return;
- fp = (void __user *)regs->bp;
+ fp = (unsigned long __user *)regs->bp;
perf_callchain_store(entry, regs->ip);
@@ -2345,16 +2368,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
pagefault_disable();
while (entry->nr < entry->max_stack) {
unsigned long bytes;
+
frame.next_frame = NULL;
frame.return_address = 0;
- if (!access_ok(VERIFY_READ, fp, 16))
+ if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
break;
- bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
if (bytes != 0)
break;
- bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+ bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
if (bytes != 0)
break;
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 3660b2cf245a..06c2baa51814 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -1,8 +1,8 @@
obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o
-intel-rapl-objs := rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o
+intel-rapl-perf-objs := rapl.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c666958a625..0974ba11e954 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
+#include <asm/intel-family.h>
#include <asm/apic.h>
#include "../perf_event.h"
@@ -115,6 +116,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ /*
+ * When HT is off these events can only run on the bottom 4 counters
+ * When HT is on, they are impacted by the HT bug and require EXCL access
+ */
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -139,6 +144,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ /*
+ * When HT is off these events can only run on the bottom 4 counters
+ * When HT is on, they are impacted by the HT bug and require EXCL access
+ */
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -177,19 +186,27 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
+
+ /*
+ * when HT is off, these can only run on the bottom 4 counters
+ */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */
+
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7,
- MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7,
- MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
EVENT_EXTRA_END
};
@@ -225,14 +242,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+ "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */
+ "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+ "event=0xe,umask=0x1"); /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+ "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+ "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+ "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */
+ "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+ "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL,
};
@@ -250,6 +304,10 @@ static struct event_constraint intel_hsw_event_constraints[] = {
/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+ /*
+ * When HT is off these events can only run on the bottom 4 counters
+ * When HT is on, they are impacted by the HT bug and require EXCL access
+ */
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -258,12 +316,19 @@ static struct event_constraint intel_hsw_event_constraints[] = {
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+ /*
+ * when HT is off, these can only run on the bottom 4 counters
+ */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */
EVENT_CONSTRAINT_END
};
@@ -1332,6 +1397,29 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+ "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+ "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+ "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+ EVENT_PTR(td_total_slots_slm),
+ EVENT_PTR(td_total_slots_scale_slm),
+ EVENT_PTR(td_fetch_bubbles_slm),
+ EVENT_PTR(td_fetch_bubbles_scale_slm),
+ EVENT_PTR(td_slots_issued_slm),
+ EVENT_PTR(td_slots_retired_slm),
+ NULL
+};
+
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3261,11 +3349,11 @@ static int intel_snb_pebs_broken(int cpu)
u32 rev = UINT_MAX; /* default to broken for unknown models */
switch (cpu_data(cpu).x86_model) {
- case 42: /* SNB */
+ case INTEL_FAM6_SANDYBRIDGE:
rev = 0x28;
break;
- case 45: /* SNB-EP */
+ case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_mask) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
@@ -3302,6 +3390,13 @@ static void intel_snb_check_microcode(void)
}
}
+static bool is_lbr_from(unsigned long msr)
+{
+ unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+
+ return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+}
+
/*
* Under certain circumstances, access certain MSR may cause #GP.
* The function tests if the input MSR can be safely accessed.
@@ -3322,13 +3417,24 @@ static bool check_msr(unsigned long msr, u64 mask)
* Only change the bits which can be updated by wrmsrl.
*/
val_tmp = val_old ^ mask;
+
+ if (is_lbr_from(msr))
+ val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+
if (wrmsrl_safe(msr, val_tmp) ||
rdmsrl_safe(msr, &val_new))
return false;
+ /*
+ * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
+ * should equal rdmsrl()'s even with the quirk.
+ */
if (val_new != val_tmp)
return false;
+ if (is_lbr_from(msr))
+ val_old = lbr_from_signext_quirk_wr(val_old);
+
/* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return.
*/
@@ -3437,6 +3543,13 @@ static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
@@ -3508,15 +3621,15 @@ __init int intel_pmu_init(void)
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
- case 14: /* 65nm Core "Yonah" */
+ case INTEL_FAM6_CORE_YONAH:
pr_cont("Core events, ");
break;
- case 15: /* 65nm Core2 "Merom" */
+ case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
- case 22: /* 65nm Core2 "Merom-L" */
- case 23: /* 45nm Core2 "Penryn" */
- case 29: /* 45nm Core2 "Dunnington (MP) */
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3527,9 +3640,9 @@ __init int intel_pmu_init(void)
pr_cont("Core2 events, ");
break;
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3557,11 +3670,11 @@ __init int intel_pmu_init(void)
pr_cont("Nehalem events, ");
break;
- case 28: /* 45nm Atom "Pineview" */
- case 38: /* 45nm Atom "Lincroft" */
- case 39: /* 32nm Atom "Penwell" */
- case 53: /* 32nm Atom "Cloverview" */
- case 54: /* 32nm Atom "Cedarview" */
+ case INTEL_FAM6_ATOM_PINEVIEW:
+ case INTEL_FAM6_ATOM_LINCROFT:
+ case INTEL_FAM6_ATOM_PENWELL:
+ case INTEL_FAM6_ATOM_CLOVERVIEW:
+ case INTEL_FAM6_ATOM_CEDARVIEW:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3573,9 +3686,9 @@ __init int intel_pmu_init(void)
pr_cont("Atom events, ");
break;
- case 55: /* 22nm Atom "Silvermont" */
- case 76: /* 14nm Atom "Airmont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3587,11 +3700,12 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.cpu_events = slm_events_attrs;
pr_cont("Silvermont events, ");
break;
- case 92: /* 14nm Atom "Goldmont" */
- case 95: /* 14nm Atom "Goldmont Denverton" */
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_DENVERTON:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3614,9 +3728,9 @@ __init int intel_pmu_init(void)
pr_cont("Goldmont events, ");
break;
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3643,8 +3757,8 @@ __init int intel_pmu_init(void)
pr_cont("Westmere events, ");
break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3657,7 +3771,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == 45)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3679,8 +3793,8 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3696,7 +3810,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == 62)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3714,10 +3828,10 @@ __init int intel_pmu_init(void)
break;
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3741,10 +3855,10 @@ __init int intel_pmu_init(void)
pr_cont("Haswell events, ");
break;
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3777,7 +3891,7 @@ __init int intel_pmu_init(void)
pr_cont("Broadwell events, ");
break;
- case 87: /* Knights Landing Xeon Phi */
+ case INTEL_FAM6_XEON_PHI_KNL:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
@@ -3795,16 +3909,22 @@ __init int intel_pmu_init(void)
pr_cont("Knights Landing events, ");
break;
- case 142: /* 14nm Kabylake Mobile */
- case 158: /* 14nm Kabylake Desktop */
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- case 85: /* 14nm Skylake Server */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
+ /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+ event_attr_td_recovery_bubbles.event_str_noht =
+ "event=0xd,umask=0x1,cmask=1";
+ event_attr_td_recovery_bubbles.event_str_ht =
+ "event=0xd,umask=0x1,cmask=1,any=1";
+
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3885,6 +4005,8 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}
+ if (x86_pmu.lbr_nr)
+ pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/*
* Access extra MSR may cause #GP under certain circumstances.
* E.g. KVM doesn't support offcore event
@@ -3917,16 +4039,14 @@ __init int intel_pmu_init(void)
*/
static __init int fixup_ht_bug(void)
{
- int cpu = smp_processor_id();
- int w, c;
+ int c;
/*
* problem not present on this CPU model, nothing to do
*/
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
- w = cpumask_weight(topology_sibling_cpumask(cpu));
- if (w > 1) {
+ if (topology_max_smt_threads() > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9ba4e4136a15..4c7638b91fa5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,6 +89,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = {
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
- X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
- X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
- X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
- X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
- X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
- X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
- X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
- X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
- X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
- X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
- X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
- X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
- X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
- X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
- X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
- X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
- X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
- X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
- X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
- X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 9e2b40cdb05f..707d358e0dff 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -77,9 +77,11 @@ static enum {
LBR_IND_JMP |\
LBR_FAR)
-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
-#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
-#define LBR_FROM_FLAG_ABORT (1ULL << 61)
+#define LBR_FROM_FLAG_MISPRED BIT_ULL(63)
+#define LBR_FROM_FLAG_IN_TX BIT_ULL(62)
+#define LBR_FROM_FLAG_ABORT BIT_ULL(61)
+
+#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
/*
* x86control flow change classification
@@ -235,6 +237,97 @@ enum {
LBR_VALID,
};
+/*
+ * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
+ * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
+ * TSX is not supported they have no consistent behavior:
+ *
+ * - For wrmsr(), bits 61:62 are considered part of the sign extension.
+ * - For HW updates (branch captures) bits 61:62 are always OFF and are not
+ * part of the sign extension.
+ *
+ * Therefore, if:
+ *
+ * 1) LBR has TSX format
+ * 2) CPU has no TSX support enabled
+ *
+ * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
+ * value from rdmsr() must be converted to have a 61 bits sign extension,
+ * ignoring the TSX flags.
+ */
+static inline bool lbr_from_signext_quirk_needed(void)
+{
+ int lbr_format = x86_pmu.intel_cap.lbr_format;
+ bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
+ boot_cpu_has(X86_FEATURE_RTM);
+
+ return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+}
+
+DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+
+/* If quirk is enabled, ensure sign extension is 63 bits: */
+inline u64 lbr_from_signext_quirk_wr(u64 val)
+{
+ if (static_branch_unlikely(&lbr_from_quirk_key)) {
+ /*
+ * Sign extend into bits 61:62 while preserving bit 63.
+ *
+ * Quirk is enabled when TSX is disabled. Therefore TSX bits
+ * in val are always OFF and must be changed to be sign
+ * extension bits. Since bits 59:60 are guaranteed to be
+ * part of the sign extension bits, we can just copy them
+ * to 61:62.
+ */
+ val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
+ }
+ return val;
+}
+
+/*
+ * If quirk is needed, ensure sign extension is 61 bits:
+ */
+u64 lbr_from_signext_quirk_rd(u64 val)
+{
+ if (static_branch_unlikely(&lbr_from_quirk_key)) {
+ /*
+ * Quirk is on when TSX is not enabled. Therefore TSX
+ * flags must be read as OFF.
+ */
+ val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
+ }
+ return val;
+}
+
+static inline void wrlbr_from(unsigned int idx, u64 val)
+{
+ val = lbr_from_signext_quirk_wr(val);
+ wrmsrl(x86_pmu.lbr_from + idx, val);
+}
+
+static inline void wrlbr_to(unsigned int idx, u64 val)
+{
+ wrmsrl(x86_pmu.lbr_to + idx, val);
+}
+
+static inline u64 rdlbr_from(unsigned int idx)
+{
+ u64 val;
+
+ rdmsrl(x86_pmu.lbr_from + idx, val);
+
+ return lbr_from_signext_quirk_rd(val);
+}
+
+static inline u64 rdlbr_to(unsigned int idx)
+{
+ u64 val;
+
+ rdmsrl(x86_pmu.lbr_to + idx, val);
+
+ return val;
+}
+
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
{
int i;
@@ -251,8 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
tos = task_ctx->tos;
for (i = 0; i < tos; i++) {
lbr_idx = (tos - i) & mask;
- wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
- wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
+ wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
+
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
@@ -262,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
{
- int i;
unsigned lbr_idx, mask;
u64 tos;
+ int i;
if (task_ctx->lbr_callstack_users == 0) {
task_ctx->lbr_stack_state = LBR_NONE;
@@ -275,8 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
tos = intel_pmu_lbr_tos();
for (i = 0; i < tos; i++) {
lbr_idx = (tos - i) & mask;
- rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
- rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+ task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
+ task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
@@ -452,8 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
u16 cycles = 0;
int lbr_flags = lbr_desc[lbr_format];
- rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
- rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
+ from = rdlbr_from(lbr_idx);
+ to = rdlbr_to(lbr_idx);
if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
@@ -956,7 +1050,6 @@ void __init intel_pmu_lbr_init_core(void)
* SW branch filter usage:
* - compensate for lack of HW filter
*/
- pr_cont("4-deep LBR, ");
}
/* nehalem/westmere */
@@ -977,7 +1070,6 @@ void __init intel_pmu_lbr_init_nhm(void)
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
- pr_cont("16-deep LBR, ");
}
/* sandy bridge */
@@ -997,7 +1089,6 @@ void __init intel_pmu_lbr_init_snb(void)
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
- pr_cont("16-deep LBR, ");
}
/* haswell */
@@ -1011,7 +1102,8 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
- pr_cont("16-deep LBR, ");
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1031,7 +1123,6 @@ __init void intel_pmu_lbr_init_skl(void)
* That requires LBR_FAR but that means far
* jmp need to be filtered out
*/
- pr_cont("32-deep LBR, ");
}
/* atom */
@@ -1057,7 +1148,6 @@ void __init intel_pmu_lbr_init_atom(void)
* SW branch filter usage:
* - compensate for lack of HW filter
*/
- pr_cont("8-deep LBR, ");
}
/* slm */
@@ -1088,6 +1178,4 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
-
- pr_cont("8-deep LBR, ");
}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index e30eef4f29a6..d0c58b35155f 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -786,26 +787,27 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
};
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
- X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
- X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
- X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
- X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
- X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
- X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
- X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
- X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
- X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
- X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce74062d981..59b4974c697f 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,5 @@
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -882,7 +883,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
@@ -903,20 +904,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
/*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
- */
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
*/
- if (boot_cpu_data.x86_model == 87) {
- while (pmu->func_id >= 0)
- pmu++;
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+ const struct pci_device_id *ids = pci_drv->id_table;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn == pdev->devfn) {
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ break;
+ }
+ }
+ ids++;
+ }
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
@@ -956,7 +974,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
static void uncore_pci_remove(struct pci_dev *pdev)
{
- struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, pkg;
@@ -1361,30 +1379,32 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
};
static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ .cpu_init = skl_uncore_cpu_init,
.pci_init = skl_uncore_pci_init,
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
- X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
- X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
- X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
- X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
- X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
- X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
- X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
- X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
- X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
- X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
- X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9a3580..d6063e438158 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
+ ((dev << 24) | (func << 16) | (type << 8) | idx)
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff)
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
@@ -360,6 +364,7 @@ int bdw_uncore_pci_init(void);
int skl_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
+void skl_uncore_cpu_init(void);
int snb_pci2phy_map_init(int devid);
/* perf_event_intel_uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 96531d2b843f..97a69dbba649 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,4 +1,4 @@
-/* Nehalem/SandBridge/Haswell uncore support */
+/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
/* Uncore IMC PCI IDs */
@@ -9,6 +9,7 @@
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -64,6 +65,10 @@
#define NHM_UNC_PERFEVTSEL0 0x3c0
#define NHM_UNC_UNCORE_PMC0 0x3b0
+/* SKL uncore global control */
+#define SKL_UNC_PERF_GLOBAL_CTL 0xe01
+#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1)
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -179,6 +184,60 @@ void snb_uncore_cpu_init(void)
snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
}
+static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0) {
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+ }
+}
+
+static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0)
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct intel_uncore_ops skl_uncore_msr_ops = {
+ .init_box = skl_uncore_msr_init_box,
+ .exit_box = skl_uncore_msr_exit_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skl_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &skl_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+ .event_descs = snb_uncore_events,
+};
+
+static struct intel_uncore_type *skl_msr_uncores[] = {
+ &skl_uncore_cbox,
+ &snb_uncore_arb,
+ NULL,
+};
+
+void skl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = skl_msr_uncores;
+ if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
enum {
SNB_PCI_UNCORE_IMC,
};
@@ -544,6 +603,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+
{ /* end: all zeroes */ },
};
@@ -587,6 +651,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
+ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */
{ /* end marker */ }
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 874e8bd64d1d..824e54086e07 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = {
*/
static const struct pci_device_id knl_uncore_pci_ids[] = {
- { /* MC UClk */
+ { /* MC0 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
},
- { /* MC DClk Channel */
+ { /* MC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+ },
+ { /* MC0 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
+ },
+ { /* MC0 DClk CH 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+ },
+ { /* MC0 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+ },
+ { /* MC1 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+ },
+ { /* MC1 DClk CH 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+ },
+ { /* MC1 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+ },
+ { /* EDC0 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
+ },
+ { /* EDC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+ },
+ { /* EDC2 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+ },
+ { /* EDC3 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
},
- { /* EDC UClk */
+ { /* EDC4 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+ },
+ { /* EDC5 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+ },
+ { /* EDC6 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+ },
+ { /* EDC7 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+ },
+ { /* EDC0 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* EDC1 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+ },
+ { /* EDC2 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+ },
+ { /* EDC3 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+ },
+ { /* EDC4 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+ },
+ { /* EDC5 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+ },
+ { /* EDC6 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
},
- { /* EDC EClk */
+ { /* EDC7 EClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
},
{ /* M2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 85ef3c2e80e0..50b3a056f96b 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,4 +1,5 @@
#include <linux/perf_event.h>
+#include <asm/intel-family.h>
enum perf_msr_id {
PERF_MSR_TSC = 0,
@@ -34,39 +35,43 @@ static bool test_intel(int idx)
return false;
switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
-
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
+
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE2:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
+
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
+
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
+
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
+
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
+
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
if (idx == PERF_MSR_SMI)
return true;
break;
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764df815d..8c4a47706296 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \
.event_str = str, \
};
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \
+static struct perf_pmu_events_ht_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+ .id = 0, \
+ .event_str_noht = noht, \
+ .event_str_ht = ht, \
+}
+
extern struct x86_pmu x86_pmu __read_mostly;
static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b);
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
#ifdef CONFIG_CPU_SUP_AMD
@@ -892,6 +902,8 @@ void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+u64 lbr_from_signext_quirk_wr(u64 val);
+
void intel_pmu_lbr_reset(void);
void intel_pmu_lbr_enable(struct perf_event *event);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 3e8674288198..a58b99811105 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v)
#define atomic_inc_return(v) (atomic_add_return(1, v))
#define atomic_dec_return(v) (atomic_sub_return(1, v))
+static __always_inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ return xadd(&v->counter, i);
+}
+
+static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+ return xadd(&v->counter, -i);
+}
+
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
return cmpxchg(&v->counter, old, new);
@@ -190,10 +200,29 @@ static inline void atomic_##op(int i, atomic_t *v) \
: "memory"); \
}
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int old, val = atomic_read(v); \
+ for (;;) { \
+ old = atomic_cmpxchg(v, val, val c_op i); \
+ if (old == val) \
+ break; \
+ val = old; \
+ } \
+ return old; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &)
+ATOMIC_OPS(or , |)
+ATOMIC_OPS(xor, ^)
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP
/**
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a984111135b1..71d7705fb303 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \
c = old; \
}
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
+{ \
+ long long old, c = 0; \
+ while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \
+ c = old; \
+ return old; \
+}
+
+ATOMIC64_FETCH_OP(add, +)
+
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 037351022f54..70eed0e14553 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
return atomic64_add_return(-i, v);
}
+static inline long atomic64_fetch_add(long i, atomic64_t *v)
+{
+ return xadd(&v->counter, i);
+}
+
+static inline long atomic64_fetch_sub(long i, atomic64_t *v)
+{
+ return xadd(&v->counter, -i);
+}
+
#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
@@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v) \
: "memory"); \
}
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+{ \
+ long old, val = atomic64_read(v); \
+ for (;;) { \
+ old = atomic64_cmpxchg(v, val, val c_op i); \
+ if (old == val) \
+ break; \
+ val = old; \
+ } \
+ return old; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP
#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4a413485f9eb..c64b1e9c5d1a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -301,10 +301,6 @@
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
@@ -312,5 +308,7 @@
*/
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
+#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 78d1e7467eae..55b4596ef688 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -41,10 +41,9 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
*/
-#define arch_efi_call_virt(f, args...) \
+#define arch_efi_call_virt(p, f, args...) \
({ \
- ((efi_##f##_t __attribute__((regparm(0)))*) \
- efi.systab->runtime->f)(args); \
+ ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \
})
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
@@ -81,8 +80,8 @@ struct efi_scratch {
} \
})
-#define arch_efi_call_virt(f, args...) \
- efi_call((void *)efi.systab->runtime->f, args) \
+#define arch_efi_call_virt(p, f, args...) \
+ efi_call((void *)p->f, args) \
#define arch_efi_call_virt_teardown() \
({ \
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda45a02..e9355a84fc67 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
/* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a44216e..d9850758464e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do { \
static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
- else
- return 0;
+
+ return 0;
}
#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index fdcc04020636..7c1c89598688 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -69,29 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
}
static __always_inline
-u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
-{
- u64 delta = rdtsc_ordered() - src->tsc_timestamp;
- return pvclock_scale_delta(delta, src->tsc_to_system_mul,
- src->tsc_shift);
-}
-
-static __always_inline
unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
cycle_t *cycles, u8 *flags)
{
unsigned version;
- cycle_t ret, offset;
- u8 ret_flags;
+ cycle_t offset;
+ u64 delta;
version = src->version;
+ /* Make the latest version visible */
+ smp_rmb();
- offset = pvclock_get_nsec_offset(src);
- ret = src->system_time + offset;
- ret_flags = src->flags;
-
- *cycles = ret;
- *flags = ret_flags;
+ delta = rdtsc_ordered() - src->tsc_timestamp;
+ offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+ src->tsc_shift);
+ *cycles = src->system_time + offset;
+ *flags = src->flags;
return version;
}
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 453744c1d347..089ced4edbbc 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
: "memory", "cc");
}
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
- : "+m" (sem->count)
- : "er" (delta));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- return delta + xadd(&sem->count, delta);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5031b..e346572841a0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+ return __max_smt_threads;
+}
+
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
extern int topology_phys_to_logical_pkg(unsigned int pkg);
#else
@@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg);
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index a147e676fc7b..e991d5c8bb3a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -71,8 +71,8 @@ int amd_cache_northbridges(void)
while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
i++;
- if (i == 0)
- return 0;
+ if (!i)
+ return -ENODEV;
nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
if (!nb)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 34c89a3e8260..83f1a98d37db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -46,7 +46,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
return;
mce_setup(&m);
- m.bank = 1;
+ m.bank = -1;
/* Fake a memory read error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 92e5e37d97bf..58af6300992d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -425,7 +425,7 @@ static u64 mce_rdmsrl(u32 msr)
}
if (rdmsrl_safe(msr, &v)) {
- WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
+ WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 10b0661651e0..7b7f3be783d4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -93,7 +93,7 @@ const char * const amd_df_mcablock_names[] = {
EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
-static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
+static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bca14c899137..57b71373bae3 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -11,7 +11,11 @@
#include <linux/pci.h>
#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/pci_ids.h>
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_regs.h>
#include <drm/i915_drm.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
@@ -21,6 +25,9 @@
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/irq_remapping.h>
+#include <asm/early_ioremap.h>
+
+#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
static void __init fix_hypertransport_config(int num, int slot, int func)
{
@@ -76,6 +83,13 @@ static void __init nvidia_bugs(int num, int slot, int func)
#ifdef CONFIG_ACPI
#ifdef CONFIG_X86_IO_APIC
/*
+ * Only applies to Nvidia root ports (bus 0) and not to
+ * Nvidia graphics cards with PCI ports on secondary buses.
+ */
+ if (num)
+ return;
+
+ /*
* All timer overrides on Nvidia are
* wrong unless HPET is enabled.
* Unfortunately that's not true on many Asus boards.
@@ -590,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func)
#endif
}
+#define BCM4331_MMIO_SIZE 16384
+#define BCM4331_PM_CAP 0x40
+#define bcma_aread32(reg) ioread32(mmio + 1 * BCMA_CORE_SIZE + reg)
+#define bcma_awrite32(reg, val) iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg)
+
+static void __init apple_airport_reset(int bus, int slot, int func)
+{
+ void __iomem *mmio;
+ u16 pmcsr;
+ u64 addr;
+ int i;
+
+ if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc."))
+ return;
+
+ /* Card may have been put into PCI_D3hot by grub quirk */
+ pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+
+ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr);
+ mdelay(10);
+
+ pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+ dev_err("Cannot power up Apple AirPort card\n");
+ return;
+ }
+ }
+
+ addr = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+ addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32;
+ addr &= PCI_BASE_ADDRESS_MEM_MASK;
+
+ mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
+ if (!mmio) {
+ dev_err("Cannot iomap Apple AirPort card\n");
+ return;
+ }
+
+ pr_info("Resetting Apple AirPort card (left enabled by EFI)\n");
+
+ for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++)
+ udelay(10);
+
+ bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
+ bcma_aread32(BCMA_RESET_CTL);
+ udelay(1);
+
+ bcma_awrite32(BCMA_RESET_CTL, 0);
+ bcma_aread32(BCMA_RESET_CTL);
+ udelay(10);
+
+ early_iounmap(mmio, BCM4331_MMIO_SIZE);
+}
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
@@ -603,12 +672,6 @@ struct chipset {
void (*f)(int num, int slot, int func);
};
-/*
- * Only works for devices on the root bus. If you add any devices
- * not on bus 0 readd another loop level in early_quirks(). But
- * be careful because at least the Nvidia quirk here relies on
- * only matching on bus 0.
- */
static struct chipset early_qrk[] __initdata = {
{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
@@ -638,9 +701,13 @@ static struct chipset early_qrk[] __initdata = {
*/
{ PCI_VENDOR_ID_INTEL, 0x0f00,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+ { PCI_VENDOR_ID_BROADCOM, 0x4331,
+ PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
{}
};
+static void __init early_pci_scan_bus(int bus);
+
/**
* check_dev_quirk - apply early quirks to a given PCI device
* @num: bus number
@@ -649,7 +716,7 @@ static struct chipset early_qrk[] __initdata = {
*
* Check the vendor & device ID against the early quirks table.
*
- * If the device is single function, let early_quirks() know so we don't
+ * If the device is single function, let early_pci_scan_bus() know so we don't
* poke at this device again.
*/
static int __init check_dev_quirk(int num, int slot, int func)
@@ -658,6 +725,7 @@ static int __init check_dev_quirk(int num, int slot, int func)
u16 vendor;
u16 device;
u8 type;
+ u8 sec;
int i;
class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
@@ -685,25 +753,36 @@ static int __init check_dev_quirk(int num, int slot, int func)
type = read_pci_config_byte(num, slot, func,
PCI_HEADER_TYPE);
+
+ if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) {
+ sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS);
+ if (sec > num)
+ early_pci_scan_bus(sec);
+ }
+
if (!(type & 0x80))
return -1;
return 0;
}
-void __init early_quirks(void)
+static void __init early_pci_scan_bus(int bus)
{
int slot, func;
- if (!early_pci_allowed())
- return;
-
/* Poor man's PCI discovery */
- /* Only scan the root bus */
for (slot = 0; slot < 32; slot++)
for (func = 0; func < 8; func++) {
/* Only probe function 0 on single fn devices */
- if (check_dev_quirk(0, slot, func))
+ if (check_dev_quirk(bus, slot, func))
break;
}
}
+
+void __init early_quirks(void)
+{
+ if (!early_pci_allowed())
+ return;
+
+ early_pci_scan_bus(0);
+}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 99bfc025111d..06c58ce46762 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -61,11 +61,16 @@ void pvclock_resume(void)
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
{
unsigned version;
- cycle_t ret;
u8 flags;
do {
- version = __pvclock_read_cycles(src, &ret, &flags);
+ version = src->version;
+ /* Make the latest version visible */
+ smp_rmb();
+
+ flags = src->flags;
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
} while ((src->version & 1) || version != src->version);
return flags & valid_flags;
@@ -80,6 +85,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
do {
version = __pvclock_read_cycles(src, &ret, &flags);
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
} while ((src->version & 1) || version != src->version);
if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b923cac..2ed0ec1353f8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu)
bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
- int i;
+ int i, threads;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
@@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu)
if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology();
}
+
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
@@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void)
#ifdef CONFIG_HOTPLUG_CPU
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+ int max_threads, cpu;
+
+ max_threads = 0;
+ for_each_online_cpu (cpu) {
+ int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+ if (threads > max_threads)
+ max_threads = threads;
+ }
+ __max_smt_threads = max_threads;
+}
+
static void remove_siblinginfo(int cpu)
{
int sibling;
@@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu)
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+ recompute_smt_state();
}
static void remove_cpu_from_maps(int cpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b283ff63..a397200281c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
if (guest_tsc < tsc_deadline)
- __delay(tsc_deadline - guest_tsc);
+ __delay(min(tsc_deadline - guest_tsc,
+ nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
}
static void start_apic_timer(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e324ce..64a79f271276 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
/* Checks for #GP/#SS exceptions. */
exn = false;
- if (is_protmode(vcpu)) {
+ if (is_long_mode(vcpu)) {
+ /* Long mode: #GP(0)/#SS(0) if the memory address is in a
+ * non-canonical form. This is the only check on the memory
+ * destination for long mode!
+ */
+ exn = is_noncanonical_address(*ret);
+ } else if (is_protmode(vcpu)) {
/* Protected mode: apply checks for segment validity in the
* following order:
* - segment type check (#GP(0) may be thrown)
@@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
* execute-only code segment
*/
exn = ((s.type & 0xa) == 8);
- }
- if (exn) {
- kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
- return 1;
- }
- if (is_long_mode(vcpu)) {
- /* Long mode: #GP(0)/#SS(0) if the memory address is in a
- * non-canonical form. This is an only check for long mode.
- */
- exn = is_noncanonical_address(*ret);
- } else if (is_protmode(vcpu)) {
+ if (exn) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+ return 1;
+ }
/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
*/
exn = (s.unusable != 0);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 902d9da12392..7da5dd2057a9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
static unsigned long max_tsc_khz;
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
-{
- return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
- vcpu->arch.virtual_tsc_shift);
-}
-
static u32 adjust_tsc_khz(u32 khz, s32 ppm)
{
u64 v = (u64)khz * (1000000 + ppm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7ce3634ab5fe..a82ca466b62e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
#define ARCH_X86_KVM_X86_H
#include <linux/kvm_host.h>
+#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
@@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns;
extern struct static_key kvm_no_apic_vcpu;
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+{
+ return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
+}
+
/* Same "calling convention" as do_div:
* - divide (n << 32) by base
* - put result in n
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 1b1110fa0057..0493c17b8a51 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -54,8 +54,8 @@ static int kasan_die_handler(struct notifier_block *self,
void *data)
{
if (val == DIE_GPF) {
- pr_emerg("CONFIG_KASAN_INLINE enabled");
- pr_emerg("GPF could be caused by NULL-ptr deref or user memory access");
+ pr_emerg("CONFIG_KASAN_INLINE enabled\n");
+ pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
}
return NOTIFY_OK;
}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index b2a4e2a61f6b..3cd69832d7f4 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -396,6 +396,7 @@ int __init pci_acpi_init(void)
return -ENODEV;
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+ acpi_irq_penalty_init();
pcibios_enable_irq = acpi_pci_irq_enable;
pcibios_disable_irq = acpi_pci_irq_disable;
x86_init.pci.init_irq = x86_init_noop;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f93545e7dc54..d898b334ff46 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -98,21 +98,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
return status;
}
-void efi_get_time(struct timespec *now)
-{
- efi_status_t status;
- efi_time_t eft;
- efi_time_cap_t cap;
-
- status = efi.get_time(&eft, &cap);
- if (status != EFI_SUCCESS)
- pr_err("Oops: efitime: can't read time!\n");
-
- now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
- eft.minute, eft.second);
- now->tv_nsec = 0;
-}
-
void __init efi_find_mirror(void)
{
efi_memory_desc_t *md;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index b226b3f497f1..5cb4301c4dcf 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -466,22 +466,17 @@ extern efi_status_t efi64_thunk(u32, ...);
#define efi_thunk(f, ...) \
({ \
efi_status_t __s; \
- unsigned long flags; \
- u32 func; \
+ unsigned long __flags; \
+ u32 __func; \
\
- efi_sync_low_kernel_mappings(); \
- local_irq_save(flags); \
+ local_irq_save(__flags); \
+ arch_efi_call_virt_setup(); \
\
- efi_scratch.prev_cr3 = read_cr3(); \
- write_cr3((unsigned long)efi_scratch.efi_pgt); \
- __flush_tlb_all(); \
+ __func = runtime_service32(f); \
+ __s = efi64_thunk(__func, __VA_ARGS__); \
\
- func = runtime_service32(f); \
- __s = efi64_thunk(func, __VA_ARGS__); \
- \
- write_cr3(efi_scratch.prev_cr3); \
- __flush_tlb_all(); \
- local_irq_restore(flags); \
+ arch_efi_call_virt_teardown(); \
+ local_irq_restore(__flags); \
\
__s; \
})
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 815fec6e05e2..66b2166ea4a1 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -40,8 +40,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
*/
return BIOS_STATUS_UNIMPLEMENTED;
- ret = efi_call((void *)__va(tab->function), (u64)which,
- a1, a2, a3, a4, a5);
+ ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
return ret;
}
EXPORT_SYMBOL_GPL(uv_bios_call);
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 009947d419a6..f2b5e6a5cf95 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -19,6 +19,7 @@
#include <asm/mtrr.h>
#include <asm/sections.h>
#include <asm/suspend.h>
+#include <asm/tlbflush.h>
/* Defined in hibernate_asm_64.S */
extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void);
* kernel's text (this value is passed in the image header).
*/
unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
/*
* Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
pgd_t *temp_level4_pgt __visible;
-void *relocated_restore_code __visible;
+unsigned long relocated_restore_code __visible;
+
+static int set_up_temporary_text_mapping(void)
+{
+ pmd_t *pmd;
+ pud_t *pud;
+
+ /*
+ * The new mapping only has to cover the page containing the image
+ * kernel's entry point (jump_address_phys), because the switch over to
+ * it is carried out by relocated code running from a page allocated
+ * specifically for this purpose and covered by the identity mapping, so
+ * the temporary kernel text mapping is only needed for the final jump.
+ * Moreover, in that mapping the virtual address of the image kernel's
+ * entry point must be the same as its virtual address in the image
+ * kernel (restore_jump_address), so the image kernel's
+ * restore_registers() code doesn't find itself in a different area of
+ * the virtual address space after switching over to the original page
+ * tables used by the image kernel.
+ */
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!pud)
+ return -ENOMEM;
+
+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd)
+ return -ENOMEM;
+
+ set_pmd(pmd + pmd_index(restore_jump_address),
+ __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
+ set_pud(pud + pud_index(restore_jump_address),
+ __pud(__pa(pmd) | _KERNPG_TABLE));
+ set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+ __pgd(__pa(pud) | _KERNPG_TABLE));
+
+ return 0;
+}
static void *alloc_pgt_page(void *context)
{
@@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void)
if (!temp_level4_pgt)
return -ENOMEM;
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+ /* Prepare a temporary mapping for the kernel text */
+ result = set_up_temporary_text_mapping();
+ if (result)
+ return result;
/* Set up the direct mapping from scratch */
for (i = 0; i < nr_pfn_mapped; i++) {
@@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void)
return 0;
}
+static int relocate_restore_code(void)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+
+ relocated_restore_code = get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+
+ memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
+
+ /* Make the page containing the relocated code executable */
+ pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+ pud = pud_offset(pgd, relocated_restore_code);
+ if (pud_large(*pud)) {
+ set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+ } else {
+ pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
+
+ if (pmd_large(*pmd)) {
+ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+ } else {
+ pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
+
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+ }
+ }
+ __flush_tlb_all();
+
+ return 0;
+}
+
int swsusp_arch_resume(void)
{
int error;
/* We have got enough memory and from now on we cannot recover */
- if ((error = set_up_temporary_mappings()))
+ error = set_up_temporary_mappings();
+ if (error)
return error;
- relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
- if (!relocated_restore_code)
- return -ENOMEM;
- memcpy(relocated_restore_code, &core_restore_code,
- &restore_registers - &core_restore_code);
+ error = relocate_restore_code();
+ if (error)
+ return error;
restore_image();
return 0;
@@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn)
struct restore_data_record {
unsigned long jump_address;
+ unsigned long jump_address_phys;
unsigned long cr3;
unsigned long magic;
};
-#define RESTORE_MAGIC 0x0123456789ABCDEFUL
+#define RESTORE_MAGIC 0x123456789ABCDEF0UL
/**
* arch_hibernation_header_save - populate the architecture specific part
@@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
if (max_size < sizeof(struct restore_data_record))
return -EOVERFLOW;
- rdr->jump_address = restore_jump_address;
+ rdr->jump_address = (unsigned long)&restore_registers;
+ rdr->jump_address_phys = __pa_symbol(&restore_registers);
rdr->cr3 = restore_cr3;
rdr->magic = RESTORE_MAGIC;
return 0;
@@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr)
struct restore_data_record *rdr = addr;
restore_jump_address = rdr->jump_address;
+ jump_address_phys = rdr->jump_address_phys;
restore_cr3 = rdr->cr3;
return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
}
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 4400a43b9e28..3177c2bc26f6 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend)
pushfq
popq pt_regs_flags(%rax)
- /* save the address of restore_registers */
- movq $restore_registers, %rax
- movq %rax, restore_jump_address(%rip)
/* save cr3 */
movq %cr3, %rax
movq %rax, restore_cr3(%rip)
@@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend)
ENDPROC(swsusp_arch_suspend)
ENTRY(restore_image)
- /* switch to temporary page tables */
- movq $__PAGE_OFFSET, %rdx
- movq temp_level4_pgt(%rip), %rax
- subq %rdx, %rax
- movq %rax, %cr3
- /* Flush TLB */
- movq mmu_cr4_features(%rip), %rax
- movq %rax, %rdx
- andq $~(X86_CR4_PGE), %rdx
- movq %rdx, %cr4; # turn off PGE
- movq %cr3, %rcx; # flush TLB
- movq %rcx, %cr3;
- movq %rax, %cr4; # turn PGE back on
-
/* prepare to jump to the image kernel */
- movq restore_jump_address(%rip), %rax
- movq restore_cr3(%rip), %rbx
+ movq restore_jump_address(%rip), %r8
+ movq restore_cr3(%rip), %r9
+
+ /* prepare to switch to temporary page tables */
+ movq temp_level4_pgt(%rip), %rax
+ movq mmu_cr4_features(%rip), %rbx
/* prepare to copy image data to their original locations */
movq restore_pblist(%rip), %rdx
+
+ /* jump to relocated restore code */
movq relocated_restore_code(%rip), %rcx
jmpq *%rcx
/* code below has been relocated to a safe page */
ENTRY(core_restore_code)
+ /* switch to temporary page tables */
+ movq $__PAGE_OFFSET, %rcx
+ subq %rcx, %rax
+ movq %rax, %cr3
+ /* flush TLB */
+ movq %rbx, %rcx
+ andq $~(X86_CR4_PGE), %rcx
+ movq %rcx, %cr4; # turn off PGE
+ movq %cr3, %rcx; # flush TLB
+ movq %rcx, %cr3;
+ movq %rbx, %cr4; # turn PGE back on
.Lloop:
testq %rdx, %rdx
jz .Ldone
@@ -96,24 +96,17 @@ ENTRY(core_restore_code)
/* progress to the next pbe */
movq pbe_next(%rdx), %rdx
jmp .Lloop
+
.Ldone:
/* jump to the restore_registers address from the image header */
- jmpq *%rax
- /*
- * NOTE: This assumes that the boot kernel's text mapping covers the
- * image kernel's page containing restore_registers and the address of
- * this page is the same as in the image kernel's text mapping (it
- * should always be true, because the text mapping is linear, starting
- * from 0, and is supposed to cover the entire kernel text for every
- * kernel).
- *
- * code below belongs to the image kernel
- */
+ jmpq *%r8
+ /* code below belongs to the image kernel */
+ .align PAGE_SIZE
ENTRY(restore_registers)
FRAME_BEGIN
/* go back to the original page tables */
- movq %rbx, %cr3
+ movq %r9, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index e69f4701a076..1104515d5ad2 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -241,6 +241,31 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
__func__, PCI_FUNC(F3->devfn), NBCFG);
}
+static void prepare_msrs(void *info)
+{
+ struct mce i_mce = *(struct mce *)info;
+ u8 b = i_mce.bank;
+
+ wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
+
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ if (i_mce.inject_flags == DFR_INT_INJ) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
+ } else {
+ wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
+ }
+
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
+ } else {
+ wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
+ wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
+ wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
+ }
+
+}
+
static void do_inject(void)
{
u64 mcg_status = 0;
@@ -287,36 +312,9 @@ static void do_inject(void)
toggle_hw_mce_inject(cpu, true);
- wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
- (u32)mcg_status, (u32)(mcg_status >> 32));
-
- if (boot_cpu_has(X86_FEATURE_SMCA)) {
- if (inj_type == DFR_INT_INJ) {
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
- } else {
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
- }
-
- wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
- (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
- } else {
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
- (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
- }
+ i_mce.mcgstatus = mcg_status;
+ i_mce.inject_flags = inj_type;
+ smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
toggle_hw_mce_inject(cpu, false);
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index fd8017ce298a..e7a23f2a519a 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \
return result; \
}
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t * v) \
+{ \
+ unsigned long tmp; \
+ int result; \
+ \
+ __asm__ __volatile__( \
+ "1: l32i %1, %3, 0\n" \
+ " wsr %1, scompare1\n" \
+ " " #op " %0, %1, %2\n" \
+ " s32c1i %0, %3, 0\n" \
+ " bne %0, %1, 1b\n" \
+ : "=&a" (result), "=&a" (tmp) \
+ : "a" (i), "a" (v) \
+ : "memory" \
+ ); \
+ \
+ return result; \
+}
+
#else /* XCHAL_HAVE_S32C1I */
#define ATOMIC_OP(op) \
@@ -138,18 +158,42 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \
return vval; \
}
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t * v) \
+{ \
+ unsigned int tmp, vval; \
+ \
+ __asm__ __volatile__( \
+ " rsil a15,"__stringify(TOPLEVEL)"\n" \
+ " l32i %0, %3, 0\n" \
+ " " #op " %1, %0, %2\n" \
+ " s32i %1, %3, 0\n" \
+ " wsr a15, ps\n" \
+ " rsync\n" \
+ : "=&a" (vval), "=&a" (tmp) \
+ : "a" (i), "a" (v) \
+ : "a15", "memory" \
+ ); \
+ \
+ return vval; \
+}
+
#endif /* XCHAL_HAVE_S32C1I */
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index 1d95fa5dcd10..a36221cf6363 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -11,6 +11,9 @@
#ifndef _XTENSA_SPINLOCK_H
#define _XTENSA_SPINLOCK_H
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
/*
* spinlock
*
@@ -29,8 +32,11 @@
*/
#define arch_spin_is_locked(x) ((x)->slock != 0)
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, !VAL);
+}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c
index 5f4bd71971d6..4904c5c16918 100644
--- a/arch/xtensa/platforms/xt2000/setup.c
+++ b/arch/xtensa/platforms/xt2000/setup.c
@@ -113,7 +113,6 @@ void platform_heartbeat(void)
}
//#define RS_TABLE_SIZE 2
-//#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF|UPF_SKIP_TEST)
#define _SERIAL_PORT(_base,_irq) \
{ \