diff options
Diffstat (limited to 'arch')
165 files changed, 3227 insertions, 1080 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 572b228c44c7..498933a7df97 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } \ #define ATOMIC_OP_RETURN(op, asm_op) \ -static inline int atomic_##op##_return(int i, atomic_t *v) \ +static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldl_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -61,7 +60,23 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ + return result; \ +} + +#define ATOMIC_FETCH_OP(op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + long temp, result; \ + __asm__ __volatile__( \ + "1: ldl_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stl_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ return result; \ } @@ -82,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } \ #define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -97,34 +111,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ + return result; \ +} + +#define ATOMIC64_FETCH_OP(op, asm_op) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +{ \ + long temp, result; \ + __asm__ __volatile__( \ + "1: ldq_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stq_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ return result; \ } #define ATOMIC_OPS(op) \ ATOMIC_OP(op, op##l) \ ATOMIC_OP_RETURN(op, op##l) \ + ATOMIC_FETCH_OP(op, op##l) \ ATOMIC64_OP(op, op##q) \ - ATOMIC64_OP_RETURN(op, op##q) + ATOMIC64_OP_RETURN(op, op##q) \ + ATOMIC64_FETCH_OP(op, op##q) ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, bis) -ATOMIC_OP(xor, xor) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, bis) -ATOMIC64_OP(xor, xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm) \ + ATOMIC_OP(op, asm) \ + ATOMIC_FETCH_OP(op, asm) \ + ATOMIC64_OP(op, asm) \ + ATOMIC64_FETCH_OP(op, asm) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, bis) +ATOMIC_OPS(xor, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 0131a7058778..77873d0ad293 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long old, new, res; - res = sem->count; + res = atomic_long_read(&sem->count); do { new = res + RWSEM_ACTIVE_READ_BIAS; if (new <= 0) break; old = res; - res = cmpxchg(&sem->count, old, new); + res = atomic_long_cmpxchg(&sem->count, old, new); } while (res != old); return res >= 0 ? 1 : 0; } @@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_WRITE_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS; #else long temp; __asm__ __volatile__( @@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; @@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem) { long count; #ifndef CONFIG_SMP - sem->count -= RWSEM_ACTIVE_WRITE_BIAS; - count = sem->count; + sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS; + count = sem->count.counter; #else long temp; __asm__ __volatile__( @@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_WAITING_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_WAITING_BIAS; #else long temp; __asm__ __volatile__( @@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; -#else - long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%2,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (sem->count) - :"Ir" (val), "m" (sem->count)); -#endif -} - -static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; - return sem->count; -#else - long ret, temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%3,%2\n" - " addq %0,%3,%0\n" - " stq_c %2,%1\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (ret), "=m" (sem->count), "=&r" (temp) - :"Ir" (val), "m" (sem->count)); - - return ret; -#endif -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index fed9c6f44c19..a40b9fc0c6c3 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -3,6 +3,8 @@ #include <linux/kernel.h> #include <asm/current.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -13,8 +15,11 @@ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while ((x)->lock) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline int arch_spin_value_unlocked(arch_spinlock_t lock) { diff --git a/arch/arc/Makefile b/arch/arc/Makefile index d4df6be66d58..85814e74677d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -66,8 +66,6 @@ endif endif -cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables - # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok ifeq ($(atleast_gcc48),y) cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index dd683995bc9d..4e3c1b6b0806 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -67,6 +67,33 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int val, orig; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %[orig], [%[ctr]] \n" \ + " " #asm_op " %[val], %[orig], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + : [val] "=&r" (val), \ + [orig] "=&r" (orig) \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return orig; \ +} + #else /* !CONFIG_ARC_HAS_LLSC */ #ifndef CONFIG_SMP @@ -129,25 +156,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + unsigned long orig; \ + \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ + atomic_ops_lock(flags); \ + orig = v->counter; \ + v->counter c_op i; \ + atomic_ops_unlock(flags); \ + \ + return orig; \ +} + #endif /* !CONFIG_ARC_HAS_LLSC */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) -#undef SCOND_FAIL_RETRY_VAR_DEF -#undef SCOND_FAIL_RETRY_ASM -#undef SCOND_FAIL_RETRY_VARS +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) #else /* CONFIG_ARC_PLAT_EZNPS */ @@ -208,22 +254,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int temp = i; \ + \ + /* Explicit full memory barrier needed before/after */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + " mov r2, %0\n" \ + " mov r3, %1\n" \ + " .word %2\n" \ + " mov %0, r2" \ + : "+r"(temp) \ + : "r"(&v->counter), "i"(asm_op) \ + : "r2", "r3", "memory"); \ + \ + smp_mb(); \ + \ + return temp; \ +} + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3) #define atomic_sub(i, v) atomic_add(-(i), (v)) #define atomic_sub_return(i, v) atomic_add_return(-(i), (v)) -ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) #define atomic_andnot(mask, v) atomic_and(~(mask), (v)) -ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) -ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) +ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) +ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) #endif /* CONFIG_ARC_PLAT_EZNPS */ #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index cded4a9b5438..233d5ffe6ec7 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -15,8 +15,11 @@ #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #ifdef CONFIG_ARC_HAS_LLSC diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index e0efff15a5ae..b9192a653b7e 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, * prelogue is setup (callee regs saved and then fp set and not other * way around */ - pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); + pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); return 0; #endif diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi index 8450944b28e6..22f7a13e20b4 100644 --- a/arch/arm/boot/dts/armada-385-linksys.dtsi +++ b/arch/arm/boot/dts/armada-385-linksys.dtsi @@ -58,8 +58,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0xf1100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0xf1110000 0x10000>; + MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>; internal-regs { diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 2827e7ab5ebc..5dd2734e67ba 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -232,7 +232,7 @@ }; usb1: ohci@00400000 { - compatible = "atmel,at91rm9200-ohci", "usb-ohci"; + compatible = "atmel,sama5d2-ohci", "usb-ohci"; reg = <0x00400000 0x100000>; interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>; clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index a03e56fb5dbc..ca58eb279d55 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -65,8 +65,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>, <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>, + <&dram_gates 26>; status = "disabled"; }; @@ -74,8 +75,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>, <&ahb_gates 46>, + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>, + <&ahb_gates 46>, <&dram_gates 25>, <&dram_gates 26>; status = "disabled"; }; @@ -84,9 +86,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_fe0-de_be0-lcd0"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>, - <&ahb_gates 46>, <&dram_gates 25>, - <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 44>, <&ahb_gates 46>, + <&dram_gates 25>, <&dram_gates 26>; status = "disabled"; }; @@ -94,8 +96,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0"; - clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>, - <&ahb_gates 44>, <&ahb_gates 46>, + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>, + <&ahb_gates 36>, <&ahb_gates 44>, + <&ahb_gates 46>, <&dram_gates 5>, <&dram_gates 25>, <&dram_gates 26>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index bddd0de88af6..367f33012493 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -65,8 +65,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>; status = "disabled"; }; @@ -74,7 +74,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 44>; status = "disabled"; }; @@ -82,8 +83,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-tve0"; - clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>, - <&ahb_gates 44>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>, + <&ahb_gates 36>, <&ahb_gates 44>; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/sun5i-r8-chip.dts b/arch/arm/boot/dts/sun5i-r8-chip.dts index a8d8b4582397..f694482bdeb6 100644 --- a/arch/arm/boot/dts/sun5i-r8-chip.dts +++ b/arch/arm/boot/dts/sun5i-r8-chip.dts @@ -52,7 +52,7 @@ / { model = "NextThing C.H.I.P."; - compatible = "nextthing,chip", "allwinner,sun5i-r8"; + compatible = "nextthing,chip", "allwinner,sun5i-r8", "allwinner,sun5i-a13"; aliases { i2c0 = &i2c0; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index febdf4c72fb0..2c34bbbb9570 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -67,8 +67,9 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-hdmi"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, - <&ahb_gates 44>, <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 43>, <&ahb_gates 44>, + <&dram_gates 26>; status = "disabled"; }; @@ -76,8 +77,8 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0"; - clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>, - <&dram_gates 26>; + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>, + <&ahb_gates 44>, <&dram_gates 26>; status = "disabled"; }; @@ -85,7 +86,7 @@ compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; allwinner,pipeline = "de_be0-lcd0-tve0"; - clocks = <&pll5 1>, + clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>, <&ahb_gates 44>, <&dram_gates 5>, <&dram_gates 26>; status = "disabled"; @@ -231,6 +232,7 @@ pll3x2: pll3x2_clk { #clock-cells = <0>; compatible = "fixed-factor-clock"; + clocks = <&pll3>; clock-div = <1>; clock-mult = <2>; clock-output-names = "pll3-2x"; @@ -272,6 +274,7 @@ pll7x2: pll7x2_clk { #clock-cells = <0>; compatible = "fixed-factor-clock"; + clocks = <&pll7>; clock-div = <1>; clock-mult = <2>; clock-output-names = "pll7-2x"; diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts index 1eca3b28ac64..b6da15d823a6 100644 --- a/arch/arm/boot/dts/tegra30-beaver.dts +++ b/arch/arm/boot/dts/tegra30-beaver.dts @@ -1843,7 +1843,7 @@ ldo5_reg: ldo5 { regulator-name = "vddio_sdmmc,avdd_vdac"; - regulator-min-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-always-on; }; @@ -1914,6 +1914,7 @@ sdhci@78000000 { status = "okay"; + vqmmc-supply = <&ldo5_reg>; cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>; wp-gpios = <&gpio TEGRA_GPIO(T, 3) GPIO_ACTIVE_HIGH>; power-gpios = <&gpio TEGRA_GPIO(D, 7) GPIO_ACTIVE_HIGH>; diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 9e10c4567eb4..66d0e215a773 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result, val; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic_fetch_" #op "\n" \ +"1: ldrex %0, [%4]\n" \ +" " #asm_op " %1, %0, %5\n" \ +" strex %2, %1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "Ir" (i) \ + : "cc"); \ + \ + return result; \ +} + #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new) { @@ -159,6 +187,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int val; \ + \ + raw_local_irq_save(flags); \ + val = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return val; \ +} + static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -187,19 +229,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, orr) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, orr) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -317,24 +366,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, op1, op2) \ +static inline long long \ +atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \ +{ \ + long long result, val; \ + unsigned long tmp; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic64_fetch_" #op "\n" \ +"1: ldrexd %0, %H0, [%4]\n" \ +" " #op1 " %Q1, %Q0, %Q5\n" \ +" " #op2 " %R1, %R0, %R5\n" \ +" strexd %2, %1, %H1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "r" (i) \ + : "cc"); \ + \ + return result; \ +} + #define ATOMIC64_OPS(op, op1, op2) \ ATOMIC64_OP(op, op1, op2) \ - ATOMIC64_OP_RETURN(op, op1, op2) + ATOMIC64_OP_RETURN(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) ATOMIC64_OPS(add, adds, adc) ATOMIC64_OPS(sub, subs, sbc) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) #define atomic64_andnot atomic64_andnot -ATOMIC64_OP(and, and, and) -ATOMIC64_OP(andnot, bic, bic) -ATOMIC64_OP(or, orr, orr) -ATOMIC64_OP(xor, eor, eor) +ATOMIC64_OPS(and, and, and) +ATOMIC64_OPS(andnot, bic, bic) +ATOMIC64_OPS(or, orr, orr) +ATOMIC64_OPS(xor, eor, eor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index a708fa1f0905..766bf9b78160 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -28,10 +28,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_setup() efi_virtmap_load() #define arch_efi_call_virt_teardown() efi_virtmap_unload() -#define arch_efi_call_virt(f, args...) \ +#define arch_efi_call_virt(p, f, args...) \ ({ \ efi_##f##_t *__f; \ - __f = efi.systab->runtime->f; \ + __f = p->f; \ __f(args); \ }) diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 0fa418463f49..4bec45442072 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -6,6 +6,8 @@ #endif #include <linux/prefetch.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K @@ -50,8 +52,21 @@ static inline void dsb_sev(void) * memory. */ -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->tickets.owner); + + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.tickets.owner == tmp.tickets.next || + tmp.tickets.owner != owner) + break; + + wfe(); + } + smp_acquire__after_ctrl_dep(); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index df3f60cb1168..a2b3eb313a25 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -139,8 +139,8 @@ struct kvm_arch_memory_slot { #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) #define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) -#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) -#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) +#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) +#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 893941ec98dc..f1bde7c4e736 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_timer_vcpu_terminate(vcpu); kvm_vgic_vcpu_destroy(vcpu); kvm_pmu_vcpu_destroy(vcpu); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile index ecf9e0c3b107..e53c6cfcab51 100644 --- a/arch/arm/mach-mvebu/Makefile +++ b/arch/arm/mach-mvebu/Makefile @@ -7,9 +7,15 @@ CFLAGS_pmsu.o := -march=armv7-a obj-$(CONFIG_MACH_MVEBU_ANY) += system-controller.o mvebu-soc-id.o ifeq ($(CONFIG_MACH_MVEBU_V7),y) -obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o pm.o pm-board.o +obj-y += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o + +obj-$(CONFIG_PM) += pm.o pm-board.o obj-$(CONFIG_SMP) += platsmp.o headsmp.o platsmp-a9.o headsmp-a9.o endif obj-$(CONFIG_MACH_DOVE) += dove.o -obj-$(CONFIG_MACH_KIRKWOOD) += kirkwood.o kirkwood-pm.o + +ifeq ($(CONFIG_MACH_KIRKWOOD),y) +obj-y += kirkwood.o +obj-$(CONFIG_PM) += kirkwood-pm.o +endif diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 7e989d61159c..e80f0dde2189 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -162,22 +162,16 @@ exit: } /* - * This ioremap hook is used on Armada 375/38x to ensure that PCIe - * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This - * is needed as a workaround for a deadlock issue between the PCIe - * interface and the cache controller. + * This ioremap hook is used on Armada 375/38x to ensure that all MMIO + * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is + * needed for the HW I/O coherency mechanism to work properly without + * deadlock. */ static void __iomem * -armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, - unsigned int mtype, void *caller) +armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) { - struct resource pcie_mem; - - mvebu_mbus_get_pcie_mem_aperture(&pcie_mem); - - if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end) - mtype = MT_UNCACHED; - + mtype = MT_UNCACHED; return __arm_ioremap_caller(phys_addr, size, mtype, caller); } @@ -186,7 +180,8 @@ static void __init armada_375_380_coherency_init(struct device_node *np) struct device_node *cache_dn; coherency_cpu_base = of_iomap(np, 0); - arch_ioremap_caller = armada_pcie_wa_ioremap_caller; + arch_ioremap_caller = armada_wa_ioremap_caller; + pci_ioremap_set_mem_type(MT_UNCACHED); /* * We should switch the PL310 to I/O coherency mode only if diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index f3a3586a421c..c0235e0ff849 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -76,6 +76,36 @@ #define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) #define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#define atomic_fetch_add_release atomic_fetch_add_release +#define atomic_fetch_add atomic_fetch_add + +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define atomic_fetch_sub_release atomic_fetch_sub_release +#define atomic_fetch_sub atomic_fetch_sub + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#define atomic_fetch_and_release atomic_fetch_and_release +#define atomic_fetch_and atomic_fetch_and + +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#define atomic_fetch_andnot atomic_fetch_andnot + +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#define atomic_fetch_or_release atomic_fetch_or_release +#define atomic_fetch_or atomic_fetch_or + +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define atomic_fetch_xor_release atomic_fetch_xor_release +#define atomic_fetch_xor atomic_fetch_xor + #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) #define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) #define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) @@ -125,6 +155,36 @@ #define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define atomic64_fetch_add_release atomic64_fetch_add_release +#define atomic64_fetch_add atomic64_fetch_add + +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#define atomic64_fetch_sub atomic64_fetch_sub + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define atomic64_fetch_and_release atomic64_fetch_and_release +#define atomic64_fetch_and atomic64_fetch_and + +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define atomic64_fetch_andnot atomic64_fetch_andnot + +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define atomic64_fetch_or_release atomic64_fetch_or_release +#define atomic64_fetch_or atomic64_fetch_or + +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#define atomic64_fetch_xor atomic64_fetch_xor + #define atomic64_xchg_relaxed atomic_xchg_relaxed #define atomic64_xchg_acquire atomic_xchg_acquire #define atomic64_xchg_release atomic_xchg_release diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f61c84f6ba02..f819fdcff1ac 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ } \ __LL_SC_EXPORT(atomic_##op##_return##name); +#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int val, result; \ + \ + asm volatile("// atomic_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %w0, %3\n" \ +" " #asm_op " %w1, %w0, %w4\n" \ +" st" #rel "xr %w2, %w1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic_fetch_##op##name); + #define ATOMIC_OPS(...) \ ATOMIC_OP(__VA_ARGS__) \ - ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC_OPS_RLX(...) \ - ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\ ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ - ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OPS_RLX(add, add) -ATOMIC_OPS_RLX(sub, sub) +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) + +#undef ATOMIC_OPS +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, orr) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, orr) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OPS_RLX #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ } \ __LL_SC_EXPORT(atomic64_##op##_return##name); +#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +{ \ + long result, val; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %0, %3\n" \ +" " #asm_op " %1, %0, %4\n" \ +" st" #rel "xr %w2, %1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic64_fetch_##op##name); + #define ATOMIC64_OPS(...) \ ATOMIC64_OP(__VA_ARGS__) \ - ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC64_OPS_RLX(...) \ - ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \ ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ - ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OPS_RLX(add, add) -ATOMIC64_OPS_RLX(sub, sub) +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, orr) -ATOMIC64_OP(xor, eor) +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(andnot, bic) +ATOMIC64_OPS(or, orr) +ATOMIC64_OPS(xor, eor) -#undef ATOMIC64_OPS_RLX #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 39c1d340fec5..b5890be8f257 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -26,54 +26,57 @@ #endif #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) - -static inline void atomic_andnot(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), - " stclr %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \ +" " #asm_op " %w[i], %[v]\n") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic_or(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +ATOMIC_OP(andnot, stclr) +ATOMIC_OP(or, stset) +ATOMIC_OP(xor, steor) +ATOMIC_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), - " stset %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC_OP -static inline void atomic_xor(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), - " steor %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ } -static inline void atomic_add(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +#define ATOMIC_FETCH_OPS(op, asm_op) \ + ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC_FETCH_OP( , al, op, asm_op, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), - " stadd %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +ATOMIC_FETCH_OPS(andnot, ldclr) +ATOMIC_FETCH_OPS(or, ldset) +ATOMIC_FETCH_OPS(xor, ldeor) +ATOMIC_FETCH_OPS(add, ldadd) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ @@ -119,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %w[i], %w[i]\n" \ + " ldclr" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +ATOMIC_FETCH_OP_AND(_relaxed, ) +ATOMIC_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC_FETCH_OP_AND(_release, l, "memory") +ATOMIC_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC_FETCH_OP_AND + static inline void atomic_sub(int i, atomic_t *v) { register int w0 asm ("w0") = i; @@ -164,57 +194,87 @@ ATOMIC_OP_SUB_RETURN(_release, l, "memory") ATOMIC_OP_SUB_RETURN( , al, "memory") #undef ATOMIC_OP_SUB_RETURN -#undef __LL_SC_ATOMIC - -#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) - -static inline void atomic64_andnot(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), - " stclr %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \ +static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ } -static inline void atomic64_or(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC_FETCH_OP_SUB(_relaxed, ) +ATOMIC_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC_FETCH_OP_SUB(_release, l, "memory") +ATOMIC_FETCH_OP_SUB( , al, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), - " stset %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#undef ATOMIC_FETCH_OP_SUB +#undef __LL_SC_ATOMIC + +#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ +" " #asm_op " %[i], %[v]\n") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic64_xor(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC64_OP(andnot, stclr) +ATOMIC64_OP(or, stset) +ATOMIC64_OP(xor, steor) +ATOMIC64_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), - " steor %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#undef ATOMIC64_OP + +#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC64(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ } -static inline void atomic64_add(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +#define ATOMIC64_FETCH_OPS(op, asm_op) \ + ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP( , al, op, asm_op, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add), - " stadd %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +ATOMIC64_FETCH_OPS(andnot, ldclr) +ATOMIC64_FETCH_OPS(or, ldset) +ATOMIC64_FETCH_OPS(xor, ldeor) +ATOMIC64_FETCH_OPS(add, ldadd) + +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_FETCH_OPS #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ @@ -260,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ +static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %[i], %[i]\n" \ + " ldclr" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_AND(_relaxed, ) +ATOMIC64_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC64_FETCH_OP_AND(_release, l, "memory") +ATOMIC64_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC64_FETCH_OP_AND + static inline void atomic64_sub(long i, atomic64_t *v) { register long x0 asm ("x0") = i; @@ -306,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN +#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ +static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_SUB(_relaxed, ) +ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC64_FETCH_OP_SUB(_release, l, "memory") +ATOMIC64_FETCH_OP_SUB( , al, "memory") + +#undef ATOMIC64_FETCH_OP_SUB + static inline long atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index dae5c49618db..4eea7f618dce 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -91,6 +91,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL; \ +}) + #include <asm-generic/barrier.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 510c7b404454..bd86a79491bc 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb) __ret; \ }) +#define __CMPWAIT_CASE(w, sz, name) \ +static inline void __cmpwait_case_##name(volatile void *ptr, \ + unsigned long val) \ +{ \ + unsigned long tmp; \ + \ + asm volatile( \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz %" #w "[tmp], 1f\n" \ + " wfe\n" \ + "1:" \ + : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [val] "r" (val)); \ +} + +__CMPWAIT_CASE(w, b, 1); +__CMPWAIT_CASE(w, h, 2); +__CMPWAIT_CASE(w, , 4); +__CMPWAIT_CASE( , , 8); + +#undef __CMPWAIT_CASE + +#define __CMPWAIT_GEN(sfx) \ +static inline void __cmpwait##sfx(volatile void *ptr, \ + unsigned long val, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + case 2: \ + return __cmpwait_case##sfx##_2(ptr, (u16)val); \ + case 4: \ + return __cmpwait_case##sfx##_4(ptr, val); \ + case 8: \ + return __cmpwait_case##sfx##_8(ptr, val); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ +} + +__CMPWAIT_GEN() + +#undef __CMPWAIT_GEN + +#define __cmpwait_relaxed(ptr, val) \ + __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) + #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 87e1985f3be8..9d9fd4b9a72e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -80,12 +80,14 @@ #define APM_CPU_PART_POTENZA 0x000 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define BRCM_CPU_PART_VULCAN 0x516 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 622db3c6474e..bd887663689b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -23,10 +23,10 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); efi_virtmap_load(); \ }) -#define arch_efi_call_virt(f, args...) \ +#define arch_efi_call_virt(p, f, args...) \ ({ \ efi_##f##_t *__f; \ - __f = efi.systab->runtime->f; \ + __f = p->f; \ __f(args); \ }) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a307eb6e7fa8..7f94755089e2 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -117,6 +117,8 @@ struct pt_regs { }; u64 orig_x0; u64 syscallno; + u64 orig_addr_limit; + u64 unused; // maintain 16 byte alignment }; #define arch_has_single_step() (1) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f8e5d47f0880..2f4ba774488a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -60,6 +60,7 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d42789499f17..af716b65110d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, (1 << MIDR_VARIANT_SHIFT) | 1), }, + { + /* Cavium ThunderX, T81 pass 1.0 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + }, #endif { } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 12e8d2bcb3f9..6c3b7345a6c4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,6 +28,7 @@ #include <asm/errno.h> #include <asm/esr.h> #include <asm/irq.h> +#include <asm/memory.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -97,7 +98,14 @@ mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE - .endif + get_thread_info tsk + /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] + mov x20, #TASK_SIZE_64 + str x20, [tsk, #TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] @@ -128,6 +136,14 @@ .endm .macro kernel_exit, el + .if \el != 0 + /* Restore the task's original addr_limit. */ + ldr x20, [sp, #S_ORIG_ADDR_LIMIT] + str x20, [tsk, #TI_ADDR_LIMIT] + + /* No need to restore UAO, it will be restored from SPSR_EL1 */ + .endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -406,7 +422,6 @@ el1_irq: bl trace_hardirqs_off #endif - get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 013e2cbe7924..b1166d1e5955 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (permission_fault(esr) && (addr < USER_DS)) { - if (get_fs() == KERNEL_DS) + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); if (!search_exception_tables(regs->pc)) diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index d74fd8ce980a..3d5ce38a6f0b 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -41,21 +41,49 @@ static inline int __atomic_##op##_return(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, asm_op, asm_con) \ +static inline int __atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, val; \ + \ + asm volatile( \ + "/* atomic_fetch_" #op " */\n" \ + "1: ssrf 5\n" \ + " ld.w %0, %3\n" \ + " mov %1, %0\n" \ + " " #asm_op " %1, %4\n" \ + " stcond %2, %1\n" \ + " brne 1b" \ + : "=&r" (result), "=&r" (val), "=o" (v->counter) \ + : "m" (v->counter), #asm_con (i) \ + : "cc"); \ + \ + return result; \ +} + ATOMIC_OP_RETURN(sub, sub, rKs21) ATOMIC_OP_RETURN(add, add, r) +ATOMIC_FETCH_OP (sub, sub, rKs21) +ATOMIC_FETCH_OP (add, add, r) -#define ATOMIC_OP(op, asm_op) \ +#define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP_RETURN(op, asm_op, r) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic_##op##_return(i, v); \ +} \ +ATOMIC_FETCH_OP(op, asm_op, r) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic_fetch_##op(i, v); \ } -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OP +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN /* @@ -87,6 +115,14 @@ static inline int atomic_add_return(int i, atomic_t *v) return __atomic_add_return(i, v); } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(-i, v); + + return __atomic_fetch_add(i, v); +} + /* * atomic_sub_return - subtract the atomic variable * @i: integer value to subtract @@ -102,6 +138,14 @@ static inline int atomic_sub_return(int i, atomic_t *v) return __atomic_add_return(-i, v); } +static inline int atomic_fetch_sub(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(i, v); + + return __atomic_fetch_add(-i, v); +} + /* * __atomic_add_unless - add unless the number is a given value * @v: pointer of type atomic_t diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 1c1c42330c99..63c7deceeeb6 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -17,6 +17,7 @@ asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr); asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value); +asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value); @@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value); #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i) #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i)) +#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i) +#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i)) + #define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i) #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i) #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i) +#define atomic_fetch_or(i, v) __raw_atomic_or_asm(&(v)->counter, i) +#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i) +#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i) + #endif #include <asm-generic/atomic.h> diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 490c7caa02d9..c58f4a83ed6f 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -12,6 +12,8 @@ #else #include <linux/atomic.h> +#include <asm/processor.h> +#include <asm/barrier.h> asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr); asmlinkage void __raw_spin_lock_asm(volatile int *ptr); @@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (arch_spin_is_locked(lock)) - cpu_relax(); + smp_cond_load_acquire(&lock->lock, !VAL); } static inline int arch_read_can_lock(arch_rwlock_t *rw) diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c index a401c27b69b4..68096e8f787f 100644 --- a/arch/blackfin/kernel/bfin_ksyms.c +++ b/arch/blackfin/kernel/bfin_ksyms.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16); #ifdef CONFIG_SMP EXPORT_SYMBOL(__raw_atomic_add_asm); +EXPORT_SYMBOL(__raw_atomic_xadd_asm); EXPORT_SYMBOL(__raw_atomic_and_asm); EXPORT_SYMBOL(__raw_atomic_or_asm); EXPORT_SYMBOL(__raw_atomic_xor_asm); diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S index 26fccb5568b9..1e2989c5d6b2 100644 --- a/arch/blackfin/mach-bf561/atomic.S +++ b/arch/blackfin/mach-bf561/atomic.S @@ -607,6 +607,28 @@ ENDPROC(___raw_atomic_add_asm) /* * r0 = ptr + * r1 = value + * + * ADD a signed value to a 32bit word and return the old value atomically. + * Clobbers: r3:0, p1:0 + */ +ENTRY(___raw_atomic_xadd_asm) + p1 = r0; + r3 = r1; + [--sp] = rets; + call _get_core_lock; + r3 = [p1]; + r2 = r3 + r2; + [p1] = r2; + r1 = p1; + call _put_core_lock; + r0 = r3; + rets = [sp++]; + rts; +ENDPROC(___raw_atomic_add_asm) + +/* + * r0 = ptr * r1 = mask * * AND the mask bits from a 32bit word and return the old 32bit value @@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 & r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 & r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 | r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 | r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 ^ r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 ^ r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 64f02d451aa8..1c2a5e264fc7 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v) return atomic_add_return(i, v) < 0; } -static inline void atomic_add(int i, atomic_t *v) -{ - atomic_add_return(i, v); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - atomic_sub_return(i, v); -} - static inline void atomic_inc(atomic_t *v) { atomic_inc_return(v); @@ -136,16 +126,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v) return atomic64_add_return(i, v) < 0; } -static inline void atomic64_add(long long i, atomic64_t *v) -{ - atomic64_add_return(i, v); -} - -static inline void atomic64_sub(long long i, atomic64_t *v) -{ - atomic64_sub_return(i, v); -} - static inline void atomic64_inc(atomic64_t *v) { atomic64_inc_return(v); @@ -182,11 +162,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) } #define ATOMIC_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic32_fetch_##op(i, &v->counter); \ +} \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic32_fetch_##op(i, &v->counter); \ } \ \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + return __atomic64_fetch_##op(i, &v->counter); \ +} \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ (void)__atomic64_fetch_##op(i, &v->counter); \ @@ -195,6 +183,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ ATOMIC_OP(or) ATOMIC_OP(and) ATOMIC_OP(xor) +ATOMIC_OP(add) +ATOMIC_OP(sub) #undef ATOMIC_OP diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h index 36e126d2f801..d4912c88b829 100644 --- a/arch/frv/include/asm/atomic_defs.h +++ b/arch/frv/include/asm/atomic_defs.h @@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op); ATOMIC_FETCH_OP(or) ATOMIC_FETCH_OP(and) ATOMIC_FETCH_OP(xor) +ATOMIC_FETCH_OP(add) +ATOMIC_FETCH_OP(sub) ATOMIC_OP_RETURN(add) ATOMIC_OP_RETURN(sub) diff --git a/arch/frv/include/asm/serial.h b/arch/frv/include/asm/serial.h index bce0d0d07e60..614c6d76789a 100644 --- a/arch/frv/include/asm/serial.h +++ b/arch/frv/include/asm/serial.h @@ -12,7 +12,3 @@ * the base baud is derived from the clock speed and so is variable */ #define BASE_BAUD 0 - -#define STD_COM_FLAGS UPF_BOOT_AUTOCONF - -#define SERIAL_PORT_DFNS diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 4435a445ae7e..349a47a918db 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + h8300flags flags; \ + int ret; \ + \ + flags = arch_local_irq_save(); \ + ret = v->counter; \ + v->counter c_op i; \ + arch_local_irq_restore(flags); \ + return ret; \ +} + #define ATOMIC_OP(op, c_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ @@ -41,17 +54,21 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) +#undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define atomic_add(i, v) (void)atomic_add_return(i, v) #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) - -#define atomic_sub(i, v) (void)atomic_sub_return(i, v) #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) #define atomic_inc_return(v) atomic_add_return(1, v) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 55696c4100d4..a62ba368b27d 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ ); \ } \ -#define ATOMIC_OP_RETURN(op) \ +#define ATOMIC_OP_RETURN(op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ int output; \ @@ -127,16 +127,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return output; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int output, val; \ + \ + __asm__ __volatile__ ( \ + "1: %0 = memw_locked(%2);\n" \ + " %1 = "#op "(%0,%3);\n" \ + " memw_locked(%2,P3)=%1;\n" \ + " if !P3 jump 1b;\n" \ + : "=&r" (output), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory", "p3" \ + ); \ + return output; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index 12ca4ebc0338..a1c55788c5d6 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -23,6 +23,8 @@ #define _ASM_SPINLOCK_H #include <asm/irqflags.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * This file is pulled in for SMP builds. @@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) * SMP spinlocks are intended to allow only a single CPU at the lock */ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(lock) \ - do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} + #define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 8dfb5f6f6c35..f565ad376142 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v) \ return new; \ } -ATOMIC_OP(add, +) -ATOMIC_OP(sub, -) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_fetch_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(add, +) +ATOMIC_OPS(sub, -) #define atomic_add_return(i,v) \ ({ \ @@ -69,14 +88,44 @@ ATOMIC_OP(sub, -) : ia64_atomic_sub(__ia64_asr_i, v); \ }) -ATOMIC_OP(and, &) -ATOMIC_OP(or, |) -ATOMIC_OP(xor, ^) +#define atomic_fetch_add(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_add(__ia64_aar_i, v); \ +}) + +#define atomic_fetch_sub(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC_FETCH_OP(and, &) +ATOMIC_FETCH_OP(or, |) +ATOMIC_FETCH_OP(xor, ^) + +#define atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v) +#define atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v) +#define atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v) -#define atomic_and(i,v) (void)ia64_atomic_and(i,v) -#define atomic_or(i,v) (void)ia64_atomic_or(i,v) -#define atomic_xor(i,v) (void)ia64_atomic_xor(i,v) +#define atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v) +#define atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v) +#define atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v) +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP #define ATOMIC64_OP(op, c_op) \ @@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \ return new; \ } -ATOMIC64_OP(add, +) -ATOMIC64_OP(sub, -) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ long \ +ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \ +{ \ + __s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(add, +) +ATOMIC64_OPS(sub, -) #define atomic64_add_return(i,v) \ ({ \ @@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -) : ia64_atomic64_sub(__ia64_asr_i, v); \ }) -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define atomic64_fetch_add(i,v) \ +({ \ + long __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ +}) + +#define atomic64_fetch_sub(i,v) \ +({ \ + long __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC64_FETCH_OP(and, &) +ATOMIC64_FETCH_OP(or, |) +ATOMIC64_FETCH_OP(xor, ^) + +#define atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v) +#define atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v) +#define atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v) -#define atomic64_and(i,v) (void)ia64_atomic64_and(i,v) -#define atomic64_or(i,v) (void)ia64_atomic64_or(i,v) -#define atomic64_xor(i,v) (void)ia64_atomic64_xor(i,v) +#define atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v) +#define atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v) +#define atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h index f41e66d65e31..28cb819e0ff9 100644 --- a/arch/ia64/include/asm/mutex.h +++ b/arch/ia64/include/asm/mutex.h @@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (cmpxchg_acq(count, 1, 0) == 1) + if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1) return 1; return 0; } diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 8b23e070b844..8fa98dd303b4 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -40,7 +40,7 @@ static inline void __down_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); + long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1); if (result < 0) rwsem_down_read_failed(sem); @@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old + RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_acq(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old); return old; } @@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem) static inline void __up_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); + long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1); if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -115,8 +115,8 @@ static inline int __down_read_trylock (struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) { + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) { return 1; } } @@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem) static inline int __down_write_trylock (struct rw_semaphore *sem) { - long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + long tmp = atomic_long_cmpxchg_acquire(&sem->count, + RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } @@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_WAITING_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (old < 0) rwsem_downgrade_wake(sem); } -/* - * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1 - * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd. - */ -#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) -#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) - #endif /* _ASM_IA64_RWSEM_H */ diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 45698cd15b7b..ca9e76149a4a 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -15,6 +15,8 @@ #include <linux/atomic.h> #include <asm/intrinsics.h> +#include <asm/barrier.h> +#include <asm/processor.h> #define arch_spin_lock_init(x) ((x)->lock = 0) @@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) return; cpu_relax(); } + + smp_acquire__after_ctrl_dep(); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c index 01d877c6868f..cf3023dced49 100644 --- a/arch/m32r/boot/compressed/m32r_sio.c +++ b/arch/m32r/boot/compressed/m32r_sio.c @@ -8,12 +8,13 @@ #include <asm/processor.h> -static void putc(char c); +static void m32r_putc(char c); static int puts(const char *s) { char c; - while ((c = *s++)) putc(c); + while ((c = *s++)) + m32r_putc(c); return 0; } @@ -41,7 +42,7 @@ static int puts(const char *s) #define BOOT_SIO0TXB PLD_ESIO0TXB #endif -static void putc(char c) +static void m32r_putc(char c) { while ((*BOOT_SIO0STS & 0x3) != 0x3) cpu_relax(); @@ -61,7 +62,7 @@ static void putc(char c) #define SIO0TXB (volatile unsigned short *)(0x00efd000 + 30) #endif -static void putc(char c) +static void m32r_putc(char c) { while ((*SIO0STS & 0x1) == 0) cpu_relax(); diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index ea35160d632b..640cc1c7099f 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -89,16 +89,44 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int result, val; \ + \ + local_irq_save(flags); \ + __asm__ __volatile__ ( \ + "# atomic_fetch_" #op " \n\t" \ + DCACHE_CLEAR("%0", "r4", "%2") \ + M32R_LOCK" %1, @%2; \n\t" \ + "mv %0, %1 \n\t" \ + #op " %1, %3; \n\t" \ + M32R_UNLOCK" %1, @%2; \n\t" \ + : "=&r" (result), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory" \ + __ATOMIC_CLOBBER \ + ); \ + local_irq_restore(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index fa13694eaae3..323c7fc953cd 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include <linux/atomic.h> #include <asm/dcache_clear.h> #include <asm/page.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * Your basic SMP spinlocks, allowing only a single CPU anywhere @@ -27,8 +29,11 @@ #define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, VAL > 0); +} /** * arch_spin_trylock - Try spin lock and return a result diff --git a/arch/m68k/coldfire/head.S b/arch/m68k/coldfire/head.S index fa31be297b85..73d92ea0ce65 100644 --- a/arch/m68k/coldfire/head.S +++ b/arch/m68k/coldfire/head.S @@ -288,7 +288,7 @@ _clear_bss: #endif /* - * Assember start up done, start code proper. + * Assembler start up done, start code proper. */ jsr start_kernel /* start Linux kernel */ diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c index c525e4c08f84..9abb1a441da0 100644 --- a/arch/m68k/coldfire/m5272.c +++ b/arch/m68k/coldfire/m5272.c @@ -111,7 +111,7 @@ void __init config_BSP(char *commandp, int size) /***************************************************************************/ /* - * Some 5272 based boards have the FEC ethernet diectly connected to + * Some 5272 based boards have the FEC ethernet directly connected to * an ethernet switch. In this case we need to use the fixed phy type, * and we need to declare it early in boot. */ diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 821de928dc3f..6a640be48568 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -42,7 +42,7 @@ static unsigned long iospace; /* * We need to be carefull probing on bus 0 (directly connected to host - * bridge). We should only acccess the well defined possible devices in + * bridge). We should only access the well defined possible devices in * use, ignore aliases and the like. */ static unsigned char mcf_host_slot2sid[32] = { diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 3ee6976f6088..8f5b6f7dd136 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -359,6 +360,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -553,7 +555,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index e96787ffcbce..31bded9c83d4 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -341,6 +342,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -512,7 +514,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 083fe6beac14..0d7739e04ae2 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -350,6 +351,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -533,7 +535,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 475130c06dcb..2cbb5c465fec 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -340,6 +341,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 4339658c200f..96102a42c156 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -341,6 +342,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -514,7 +516,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 831cc8c3a2e2..97d88f7dc5a7 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -357,6 +358,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -536,7 +538,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 6377afeb522b..be25ef208f0f 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -390,6 +391,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -616,7 +618,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 4304b3d56262..a008344360c9 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -339,6 +340,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 074bda4094ff..6735a25f36d4 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -340,6 +341,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 07b9fa8d7f2e..780c6e9f6cf9 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -346,6 +347,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -527,7 +529,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 36e6fae02d45..44693cf361e5 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -337,6 +338,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 903acf929511..ef0071d61158 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -337,6 +338,7 @@ CONFIG_MACVTAP=m CONFIG_IPVLAN=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y @@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m +CONFIG_TEST_UUID=m CONFIG_TEST_RHASHTABLE=m +CONFIG_TEST_HASH=m CONFIG_TEST_LKM=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S index 78cb60f5bb4d..9bbffebe3eb5 100644 --- a/arch/m68k/ifpsp060/src/fpsp.S +++ b/arch/m68k/ifpsp060/src/fpsp.S @@ -10191,7 +10191,7 @@ xdnrm_con: xdnrm_sd: mov.l %a1,-(%sp) tst.b LOCAL_EX(%a0) # is denorm pos or neg? - smi.b %d1 # set d0 accodingly + smi.b %d1 # set d0 accordingly bsr.l unf_sub mov.l (%sp)+,%a1 xdnrm_exit: @@ -10990,7 +10990,7 @@ src_qnan_m: # routines where an instruction is selected by an index into # a large jump table corresponding to a given instruction which # has been decoded. Flow continues here where we now decode -# further accoding to the source operand type. +# further according to the source operand type. # global fsinh @@ -23196,14 +23196,14 @@ m_sign: # # 1. Branch on the sign of the adjusted exponent. # 2p.(positive exp) -# 2. Check M16 and the digits in lwords 2 and 3 in decending order. +# 2. Check M16 and the digits in lwords 2 and 3 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Subtract the count from the exp. # 5. Check if the exp has crossed zero in #3 above; make the exp abs # and set SE. # 6. Multiply the mantissa by 10**count. # 2n.(negative exp) -# 2. Check the digits in lwords 3 and 2 in decending order. +# 2. Check the digits in lwords 3 and 2 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Add the count to the exp. # 5. Check if the exp has crossed zero in #3 above; clear SE. diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S index 4aedef973cf6..3535e6c87eec 100644 --- a/arch/m68k/ifpsp060/src/pfpsp.S +++ b/arch/m68k/ifpsp060/src/pfpsp.S @@ -13156,14 +13156,14 @@ m_sign: # # 1. Branch on the sign of the adjusted exponent. # 2p.(positive exp) -# 2. Check M16 and the digits in lwords 2 and 3 in decending order. +# 2. Check M16 and the digits in lwords 2 and 3 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Subtract the count from the exp. # 5. Check if the exp has crossed zero in #3 above; make the exp abs # and set SE. # 6. Multiply the mantissa by 10**count. # 2n.(negative exp) -# 2. Check the digits in lwords 3 and 2 in decending order. +# 2. Check the digits in lwords 3 and 2 in descending order. # 3. Add one for each zero encountered until a non-zero digit. # 4. Add the count to the exp. # 5. Check if the exp has crossed zero in #3 above; clear SE. diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 4858178260f9..cf4c3a7b1a45 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -53,6 +53,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int t, tmp; \ + \ + __asm__ __volatile__( \ + "1: movel %2,%1\n" \ + " " #asm_op "l %3,%1\n" \ + " casl %2,%1,%0\n" \ + " jne 1b" \ + : "+m" (*v), "=&d" (t), "=&d" (tmp) \ + : "g" (i), "2" (atomic_read(v))); \ + return tmp; \ +} + #else #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -68,20 +83,41 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long flags; \ + int t; \ + \ + local_irq_save(flags); \ + t = v->counter; \ + v->counter c_op i; \ + local_irq_restore(flags); \ + \ + return t; \ +} + #endif /* CONFIG_RMW_INSNS */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h index 429fe26e320c..208b4daa14b3 100644 --- a/arch/m68k/include/asm/dma.h +++ b/arch/m68k/include/asm/dma.h @@ -18,7 +18,7 @@ * AUG/22/2000 : added support for 32-bit Dual-Address-Mode (K) 2000 * Oliver Kamphenkel (O.Kamphenkel@tu-bs.de) * - * AUG/25/2000 : addad support for 8, 16 and 32-bit Single-Address-Mode (K)2000 + * AUG/25/2000 : added support for 8, 16 and 32-bit Single-Address-Mode (K)2000 * Oliver Kamphenkel (O.Kamphenkel@tu-bs.de) * * APR/18/2002 : added proper support for MCF5272 DMA controller. diff --git a/arch/m68k/include/asm/m525xsim.h b/arch/m68k/include/asm/m525xsim.h index f186459072e9..699f20c8a0fe 100644 --- a/arch/m68k/include/asm/m525xsim.h +++ b/arch/m68k/include/asm/m525xsim.h @@ -123,10 +123,10 @@ /* * I2C module. */ -#define MCFI2C_BASE0 (MCF_MBAR + 0x280) /* Base addreess I2C0 */ +#define MCFI2C_BASE0 (MCF_MBAR + 0x280) /* Base address I2C0 */ #define MCFI2C_SIZE0 0x20 /* Register set size */ -#define MCFI2C_BASE1 (MCF_MBAR2 + 0x440) /* Base addreess I2C1 */ +#define MCFI2C_BASE1 (MCF_MBAR2 + 0x440) /* Base address I2C1 */ #define MCFI2C_SIZE1 0x20 /* Register set size */ /* diff --git a/arch/m68k/include/asm/mcfmmu.h b/arch/m68k/include/asm/mcfmmu.h index 26cc3d5a63f8..8824236e303f 100644 --- a/arch/m68k/include/asm/mcfmmu.h +++ b/arch/m68k/include/asm/mcfmmu.h @@ -38,7 +38,7 @@ /* * MMU Operation register. */ -#define MMUOR_UAA 0x00000001 /* Update allocatiom address */ +#define MMUOR_UAA 0x00000001 /* Update allocation address */ #define MMUOR_ACC 0x00000002 /* TLB access */ #define MMUOR_RD 0x00000004 /* TLB access read */ #define MMUOR_WR 0x00000000 /* TLB access write */ diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h index fc5b36278d04..c48d21b68f04 100644 --- a/arch/m68k/include/asm/q40_master.h +++ b/arch/m68k/include/asm/q40_master.h @@ -1,6 +1,6 @@ /* * Q40 master Chip Control - * RTC stuff merged for compactnes.. + * RTC stuff merged for compactness. */ #ifndef _Q40_MASTER_H diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 4d2adfb32a2a..7990b6f50105 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -60,7 +60,7 @@ * * The host talks to the IOPs using a rather simple message-passing scheme via * a shared memory area in the IOP RAM. Each IOP has seven "channels"; each - * channel is conneced to a specific software driver on the IOP. For example + * channel is connected to a specific software driver on the IOP. For example * on the SCC IOP there is one channel for each serial port. Each channel has * an incoming and and outgoing message queue with a depth of one. * diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h index 759679d9ab96..6d1e760e2a0e 100644 --- a/arch/m68k/math-emu/fp_decode.h +++ b/arch/m68k/math-emu/fp_decode.h @@ -130,7 +130,7 @@ do_fscc=0 bfextu %d2{#13,#3},%d0 .endm -| decode the 8bit diplacement from the brief extension word +| decode the 8bit displacement from the brief extension word .macro fp_decode_disp8 move.b %d2,%d0 ext.w %d0 diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 88fa25fae8bd..def2c642f053 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, temp; \ + \ + smp_mb(); \ + \ + asm volatile ( \ + "1: LNKGETD %1, [%2]\n" \ + " " #op " %0, %1, %3\n" \ + " LNKSETD [%2], %0\n" \ + " DEFR %0, TXSTAT\n" \ + " ANDT %0, %0, #HI(0x3f000000)\n" \ + " CMPT %0, #HI(0x02000000)\n" \ + " BNZ 1b\n" \ + : "=&d" (temp), "=&d" (result) \ + : "da" (&v->counter), "bd" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index 0295d9b8d5bf..6c1380a8a0d4 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h @@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long result; \ + unsigned long flags; \ + \ + __global_lock1(flags); \ + result = v->counter; \ + fence(); \ + v->counter c_op i; \ + __global_unlock1(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h index 86a7cf3d1386..c0c7a22be1ae 100644 --- a/arch/metag/include/asm/spinlock.h +++ b/arch/metag/include/asm/spinlock.h @@ -1,14 +1,24 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H +#include <asm/barrier.h> +#include <asm/processor.h> + #ifdef CONFIG_METAG_ATOMICITY_LOCK1 #include <asm/spinlock_lock1.h> #else #include <asm/spinlock_lnkget.h> #endif -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +/* + * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1 + * locked. + */ + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 835b402e4574..0ab176bdb8e8 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " " #asm_op " %0, %2 \n" \ " sc %0, %1 \n" \ " .set mips0 \n" \ - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ } while (unlikely(!temp)); \ } else { \ @@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ +static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ { \ int result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ @@ -125,23 +123,84 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ + return result; \ +} + +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ +{ \ + int result; \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + int temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + int temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ \ return result; \ } #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -362,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } #define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ @@ -409,22 +466,85 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ + return result; \ +} + +#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +{ \ + long result; \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + long temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + long temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "=" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \ + : "memory"); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ \ return result; \ } #define ATOMIC64_OPS(op, c_op, asm_op) \ ATOMIC64_OP(op, c_op, asm_op) \ - ATOMIC64_OP_RETURN(op, c_op, asm_op) + ATOMIC64_OP_RETURN(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) ATOMIC64_OPS(add, +=, daddu) ATOMIC64_OPS(sub, -=, dsubu) -ATOMIC64_OP(and, &=, and) -ATOMIC64_OP(or, |=, or) -ATOMIC64_OP(xor, ^=, xor) + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op, asm_op) \ + ATOMIC64_OP(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) + +ATOMIC64_OPS(and, &=, and) +ATOMIC64_OPS(or, |=, or) +ATOMIC64_OPS(xor, ^=, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index a6b611f1da43..7d44e888134f 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -24,7 +24,7 @@ struct mm_struct; struct vm_area_struct; #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \ - _CACHE_CACHABLE_NONCOHERENT) + _page_cachable_default) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ _page_cachable_default) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \ @@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK); pte.pte_high &= (_PFN_MASK | _CACHE_MASK); pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK; - pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK; + pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK); return pte; } #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) @@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #else static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } #endif @@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot); + pmd_val(pmd) = (pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HUGE)) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK); return pmd; } diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 40196bebe849..f485afe51514 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -12,6 +12,7 @@ #include <linux/compiler.h> #include <asm/barrier.h> +#include <asm/processor.h> #include <asm/compiler.h> #include <asm/war.h> @@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) } #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - while (arch_spin_is_locked(x)) { cpu_relax(); } + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->h.serving_now); + smp_rmb(); + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.h.serving_now == tmp.h.ticket || + tmp.h.serving_now != owner) + break; + + cpu_relax(); + } + smp_acquire__after_ctrl_dep(); +} static inline int arch_spin_is_contended(arch_spinlock_t *lock) { diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index ce318d5ab23b..36389efd45e8 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -84,16 +84,41 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return retval; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int retval, status; \ + \ + asm volatile( \ + "1: mov %4,(_AAR,%3) \n" \ + " mov (_ADR,%3),%1 \n" \ + " mov %1,%0 \n" \ + " " #op " %5,%0 \n" \ + " mov %0,(_ADR,%3) \n" \ + " mov (_ADR,%3),%0 \n" /* flush */ \ + " mov (_ASR,%3),%0 \n" \ + " or %0,%0 \n" \ + " bne 1b \n" \ + : "=&r"(status), "=&r"(retval), "=m"(v->counter) \ + : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \ + : "memory", "cc"); \ + return retval; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h index 1ae580f38933..9c7b8f7942d8 100644 --- a/arch/mn10300/include/asm/spinlock.h +++ b/arch/mn10300/include/asm/spinlock.h @@ -12,6 +12,8 @@ #define _ASM_SPINLOCK_H #include <linux/atomic.h> +#include <asm/barrier.h> +#include <asm/processor.h> #include <asm/rwlock.h> #include <asm/page.h> @@ -23,7 +25,11 @@ */ #define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0) -#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} static inline void arch_spin_unlock(arch_spinlock_t *lock) { diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 1d109990a022..5394b9c5f914 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -121,16 +121,39 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -185,15 +208,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ return ret; \ } -#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + unsigned long flags; \ + s64 ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_OP_RETURN(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) -ATOMIC64_OP(and, &=) -ATOMIC64_OP(or, |=) -ATOMIC64_OP(xor, ^=) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &=) +ATOMIC64_OPS(or, |=) +ATOMIC64_OPS(xor, ^=) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 64f2992e439f..e32936cd7f10 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x) } #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *x) +{ + volatile unsigned int *a = __ldcw_align(x); + + smp_cond_load_acquire(a, VAL); +} static inline void arch_spin_lock_flags(arch_spinlock_t *x, unsigned long flags) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index ae0751ef8788..f08d567e0ca4 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ +{ \ + int res, t; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ + PPC405_ERR77(0, %4) \ +" stwcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN_RELAXED(op, asm_op) + ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, xor) - #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP_RELAXED #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP @@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ return t; \ } +#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \ +static inline long \ +atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \ +{ \ + long res, t; \ + \ + __asm__ __volatile__( \ +"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ +" stdcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC64_OPS(op, asm_op) \ ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN_RELAXED(op, asm_op) + ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) ATOMIC64_OPS(add, add) ATOMIC64_OPS(sub, subf) -ATOMIC64_OP(and, and) -ATOMIC64_OP(or, or) -ATOMIC64_OP(xor, xor) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(or, or) +ATOMIC64_OPS(xor, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOPIC64_OPS +#undef ATOMIC64_FETCH_OP_RELAXED #undef ATOMIC64_OP_RETURN_RELAXED #undef ATOMIC64_OP diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 88a5ecaa157b..ab84c89c9e98 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size; #define KERN_VIRT_SIZE __kernel_virt_size extern struct page *vmemmap; extern unsigned long ioremap_bot; +extern unsigned long pci_io_base; #endif /* __ASSEMBLY__ */ #include <asm/book3s/64/hash.h> diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h index 127ab23e1f6c..078155fa1189 100644 --- a/arch/powerpc/include/asm/mutex.h +++ b/arch/powerpc/include/asm/mutex.h @@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) return 1; return 0; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b5f73cb5eeb6..d70101e1e25c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -647,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pci_unlock_rescan_remove(); } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } /* diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..a5ae49a2dcc4 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cbcade9..0b93893424f5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index bf8f34a58670..b7019b559ddb 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim) std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) - /* We need to setup MSR for VSX register save instructions. Here we - * also clear the MSR RI since when we do the treclaim, we won't have a - * valid kernel pointer for a while. We clear RI here as it avoids - * adding another mtmsr closer to the treclaim. This makes the region - * maked as non-recoverable wider than it needs to be but it saves on - * inserting another mtmsrd later. - */ + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP - li r16, MSR_RI + li r16, 0 ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h @@ -176,7 +170,17 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* The moment we treclaim, ALL of our GPRs will switch + /* Clear MSR RI since we are about to change r1, EE is already off. */ + li r4, 0 + mtmsrd r4, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + * + * The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ @@ -197,6 +201,11 @@ dont_backup_fp: /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + + /* Reset MSR RI so we can take SLB faults again */ + li r11, MSR_RI + mtmsrd r11, 1 + mfspr r11, SPRN_PPR HMT_MEDIUM @@ -397,11 +406,6 @@ restore_gprs: ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) - /* Clear the MSR RI since we are about to change R1. EE is already off - */ - li r4, 0 - mtmsrd r4, 1 - REST_GPR(0, r7) /* GPR0 */ REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ @@ -439,10 +443,33 @@ restore_gprs: ld r6, _CCR(r7) mtcr r6 - REST_GPR(1, r7) /* GPR1 */ - REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) - ld r7, GPR7(r7) + + /* + * Store r1 and r5 on the stack so that we can access them + * after we clear MSR RI. + */ + + REST_GPR(5, r7) + std r5, -8(r1) + ld r5, GPR1(r7) + std r5, -16(r1) + + REST_GPR(7, r7) + + /* Clear MSR RI since we are about to change r1. EE is already off */ + li r5, 0 + mtmsrd r5, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + */ + + ld r5, -8(r1) + ld r1, -16(r1) /* Commit register state as checkpointed state: */ TRECHKPT diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5b22ba0b58bc..2971ea18c768 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void) vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index e58707deef5c..7931e1496f0d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* * For now radix also use the same frag size */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 911064aa59b2..d28cc2f5b7b2 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v) return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i; } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER); +} + static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -114,22 +119,27 @@ static inline void atomic_add(int i, atomic_t *v) #define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) #define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v) #define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v) +#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v) #define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0) #define atomic_dec(_v) atomic_sub(1, _v) #define atomic_dec_return(_v) atomic_sub_return(1, _v) #define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) -#define ATOMIC_OP(op, OP) \ +#define ATOMIC_OPS(op, OP) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \ } -ATOMIC_OP(and, AND) -ATOMIC_OP(or, OR) -ATOMIC_OP(xor, XOR) +ATOMIC_OPS(and, AND) +ATOMIC_OPS(or, OR) +ATOMIC_OPS(xor, XOR) -#undef ATOMIC_OP +#undef ATOMIC_OPS #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -236,6 +246,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i; } +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER); +} + static inline void atomic64_add(long long i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -264,17 +279,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v, return old; } -#define ATOMIC64_OP(op, OP) \ +#define ATOMIC64_OPS(op, OP) \ static inline void atomic64_##op(long i, atomic64_t *v) \ { \ __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \ +} \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \ } -ATOMIC64_OP(and, AND) -ATOMIC64_OP(or, OR) -ATOMIC64_OP(xor, XOR) +ATOMIC64_OPS(and, AND) +ATOMIC64_OPS(or, OR) +ATOMIC64_OPS(xor, XOR) -#undef ATOMIC64_OP +#undef ATOMIC64_OPS #undef __ATOMIC64_LOOP static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) @@ -315,6 +334,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #define atomic64_inc_return(_v) atomic64_add_return(1, _v) #define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) #define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v) +#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long long)(_i), _v) #define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v) #define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) #define atomic64_dec(_v) atomic64_sub(1, _v) diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 5e04f3cbd320..8ae236b0f80b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc) " la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "=d" (rc), "=d" (orig_fpc) + : "=d" (rc), "=&d" (orig_fpc) : "d" (fpc), "0" (-EINVAL)); return rc; } diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index c75e4471e618..597e7e96b59e 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); - return new; -} - #endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 63ebf37d3143..7e9e09f600fa 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -10,6 +10,8 @@ #define __ASM_SPINLOCK_H #include <linux/smp.h> +#include <asm/barrier.h> +#include <asm/processor.h> #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) @@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { while (arch_spin_is_locked(lock)) arch_spin_relax(lock); + smp_acquire__after_ctrl_dep(); } /* diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f20abdb5630a..d14069d4b88d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2064,12 +2064,5 @@ void s390_reset_system(void) S390_lowcore.program_new_psw.addr = (unsigned long) s390_base_pgm_handler; - /* - * Clear subchannel ID and number to signal new kernel that no CCW or - * SCSI IPL has been done (for kexec and kdump) - */ - S390_lowcore.subchannel_id = 0; - S390_lowcore.subchannel_nr = 0; - do_reset_calls(); } diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index b94df40e5f2d..d755e96c3064 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return tmp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int res, tmp; \ + \ + __asm__ __volatile__ ( \ + " .align 2 \n\t" \ + " mova 1f, r0 \n\t" /* r0 = end point */ \ + " mov r15, r1 \n\t" /* r1 = saved sp */ \ + " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \ + " mov.l @%2, %0 \n\t" /* load old value */ \ + " mov %0, %1 \n\t" /* save old value */ \ + " " #op " %3, %0 \n\t" /* $op */ \ + " mov.l %0, @%2 \n\t" /* store new value */ \ + "1: mov r1, r15 \n\t" /* LOGOUT */ \ + : "=&r" (tmp), "=&r" (res), "+r" (v) \ + : "r" (i) \ + : "memory" , "r0", "r1"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h index 23fcdad5773e..8e2da5fa0178 100644 --- a/arch/sh/include/asm/atomic-irq.h +++ b/arch/sh/include/asm/atomic-irq.h @@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long temp, flags; \ + \ + raw_local_irq_save(flags); \ + temp = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return temp; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index 33d34b16d4d6..caea2c45f6c2 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long res, temp; \ + \ + __asm__ __volatile__ ( \ +"1: movli.l @%3, %0 ! atomic_fetch_" #op " \n" \ +" mov %0, %1 \n" \ +" " #op " %2, %0 \n" \ +" movco.l %0, @%3 \n" \ +" bf 1b \n" \ +" synco \n" \ + : "=&z" (temp), "=&z" (res) \ + : "r" (i), "r" (&v->counter) \ + : "t"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index bdc0f3b6c56a..416834b60ad0 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -19,14 +19,20 @@ #error "Need movli.l/movco.l for spinlocks" #endif +#include <asm/barrier.h> +#include <asm/processor.h> + /* * Your basic SMP spinlocks, allowing only a single CPU anywhere */ #define arch_spin_is_locked(x) ((x)->lock <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, VAL > 0); +} /* * Simple spin lock operations. There are two variants, one clears IRQ's diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 7dcbebbcaec6..ee3f11c43cda 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -20,9 +20,10 @@ #define ATOMIC_INIT(i) { (i) } int atomic_add_return(int, atomic_t *); -void atomic_and(int, atomic_t *); -void atomic_or(int, atomic_t *); -void atomic_xor(int, atomic_t *); +int atomic_fetch_add(int, atomic_t *); +int atomic_fetch_and(int, atomic_t *); +int atomic_fetch_or(int, atomic_t *); +int atomic_fetch_xor(int, atomic_t *); int atomic_cmpxchg(atomic_t *, int, int); int atomic_xchg(atomic_t *, int); int __atomic_add_unless(atomic_t *, int, int); @@ -35,7 +36,13 @@ void atomic_set(atomic_t *, int); #define atomic_inc(v) ((void)atomic_add_return( 1, (v))) #define atomic_dec(v) ((void)atomic_add_return( -1, (v))) +#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v))) +#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v))) +#define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v))) + #define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v))) +#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v))) + #define atomic_inc_return(v) (atomic_add_return( 1, (v))) #define atomic_dec_return(v) (atomic_add_return( -1, (v))) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index f2fbf9e16faf..24827a3f733a 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *); int atomic_##op##_return(int, atomic_t *); \ long atomic64_##op##_return(long, atomic64_t *); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +int atomic_fetch_##op(int, atomic_t *); \ +long atomic64_fetch_##op(long, atomic64_t *); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index bcc98fc35281..d9c5876c6121 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -9,12 +9,15 @@ #ifndef __ASSEMBLY__ #include <asm/psr.h> +#include <asm/barrier.h> #include <asm/processor.h> /* for cpu_relax */ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 968917694978..87990b7c6b0d 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -8,6 +8,9 @@ #ifndef __ASSEMBLY__ +#include <asm/processor.h> +#include <asm/barrier.h> + /* To get debugging spinlocks which detect and catch * deadlock situations, set CONFIG_DEBUG_SPINLOCK * and rebuild your kernel. @@ -23,9 +26,10 @@ #define arch_spin_is_locked(lp) ((lp)->lock != 0) -#define arch_spin_unlock_wait(lp) \ - do { rmb(); \ - } while((lp)->lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index b9d63c0a7aab..2c373329d5cb 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy); #endif /* SMP */ -#define ATOMIC_OP_RETURN(op, c_op) \ -int atomic_##op##_return(int i, atomic_t *v) \ +#define ATOMIC_FETCH_OP(op, c_op) \ +int atomic_fetch_##op(int i, atomic_t *v) \ { \ int ret; \ unsigned long flags; \ spin_lock_irqsave(ATOMIC_HASH(v), flags); \ \ - ret = (v->counter c_op i); \ + ret = v->counter; \ + v->counter c_op i; \ \ spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ return ret; \ } \ -EXPORT_SYMBOL(atomic_##op##_return); +EXPORT_SYMBOL(atomic_fetch_##op); -#define ATOMIC_OP(op, c_op) \ -void atomic_##op(int i, atomic_t *v) \ +#define ATOMIC_OP_RETURN(op, c_op) \ +int atomic_##op##_return(int i, atomic_t *v) \ { \ + int ret; \ unsigned long flags; \ spin_lock_irqsave(ATOMIC_HASH(v), flags); \ \ - v->counter c_op i; \ + ret = (v->counter c_op i); \ \ spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ + return ret; \ } \ -EXPORT_SYMBOL(atomic_##op); +EXPORT_SYMBOL(atomic_##op##_return); ATOMIC_OP_RETURN(add, +=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +ATOMIC_FETCH_OP(add, +=) +ATOMIC_FETCH_OP(and, &=) +ATOMIC_FETCH_OP(or, |=) +ATOMIC_FETCH_OP(xor, ^=) + +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN -#undef ATOMIC_OP int atomic_xchg(atomic_t *v, int new) { diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index d6b0363f345b..a5c5a0279ccc 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -9,10 +9,11 @@ .text - /* Two versions of the atomic routines, one that + /* Three versions of the atomic routines, one that * does not return a value and does not perform - * memory barriers, and a second which returns - * a value and does the barriers. + * memory barriers, and a two which return + * a value, the new and old value resp. and does the + * barriers. */ #define ATOMIC_OP(op) \ @@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + sra %g1, 0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic64_##op##_return); -#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) +#define ATOMIC64_FETCH_OP(op) \ +ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + mov %g1, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic64_fetch_##op); + +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op) ATOMIC64_OPS(add) ATOMIC64_OPS(sub) -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op) + +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 8eb454cfe05c..de5e97817bdb 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op); EXPORT_SYMBOL(atomic_##op##_return); \ EXPORT_SYMBOL(atomic64_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +EXPORT_SYMBOL(atomic_fetch_##op); \ +EXPORT_SYMBOL(atomic64_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9fc0107a9c5e..8dda3c8ff5ab 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -46,6 +46,8 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v)) +#define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) + /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index d320ce253d86..a93774255136 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v) _atomic_xchg_add(&v->counter, i); } -#define ATOMIC_OP(op) \ -unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \ +#define ATOMIC_OPS(op) \ +unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - _atomic_##op((unsigned long *)&v->counter, i); \ + _atomic_fetch_##op((unsigned long *)&v->counter, i); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + smp_mb(); \ + return _atomic_fetch_##op((unsigned long *)&v->counter, i); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) -#undef ATOMIC_OP +#undef ATOMIC_OPS + +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + smp_mb(); + return _atomic_xchg_add(&v->counter, i); +} /** * atomic_add_return - add integer and return @@ -126,16 +137,29 @@ static inline void atomic64_add(long long i, atomic64_t *v) _atomic64_xchg_add(&v->counter, i); } -#define ATOMIC64_OP(op) \ -long long _atomic64_##op(long long *v, long long n); \ +#define ATOMIC64_OPS(op) \ +long long _atomic64_fetch_##op(long long *v, long long n); \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ - _atomic64_##op(&v->counter, i); \ + _atomic64_fetch_##op(&v->counter, i); \ +} \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + smp_mb(); \ + return _atomic64_fetch_##op(&v->counter, i); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) + +#undef ATOMIC64_OPS + +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + smp_mb(); + return _atomic64_xchg_add(&v->counter, i); +} /** * atomic64_add_return - add integer and return @@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_dec(v) atomic64_sub(1LL, (v)) @@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) - #endif /* !__ASSEMBLY__ */ /* @@ -242,16 +266,16 @@ struct __get_user { unsigned long val; int err; }; -extern struct __get_user __atomic_cmpxchg(volatile int *p, +extern struct __get_user __atomic32_cmpxchg(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add_unless(volatile int *p, +extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); @@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock, long long n); extern long long __atomic64_xchg_add_unless(volatile long long *p, int *lock, long long o, long long n); -extern long long __atomic64_and(volatile long long *p, int *lock, long long n); -extern long long __atomic64_or(volatile long long *p, int *lock, long long n); -extern long long __atomic64_xor(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n); /* Return failure from the atomic wrappers. */ struct __get_user __atomic_bad_address(int __user *addr); diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index b0531a623653..4cefa0c9fd81 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -32,11 +32,6 @@ * on any routine which updates memory and returns a value. */ -static inline void atomic_add(int i, atomic_t *v) -{ - __insn_fetchadd4((void *)&v->counter, i); -} - /* * Note a subtlety of the locking here. We are required to provide a * full memory barrier before and after the operation. However, we @@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +#define ATOMIC_OPS(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int val; \ + smp_mb(); \ + val = __insn_fetch##op##4((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + __insn_fetch##op##4((void *)&v->counter, i); \ +} + +ATOMIC_OPS(add) +ATOMIC_OPS(and) +ATOMIC_OPS(or) + +#undef ATOMIC_OPS + +static inline int atomic_fetch_xor(int i, atomic_t *v) { int guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch4(&v->counter, guess ^ i); } while (guess != oldval); + smp_mb(); return oldval; } -static inline void atomic_and(int i, atomic_t *v) -{ - __insn_fetchand4((void *)&v->counter, i); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - __insn_fetchor4((void *)&v->counter, i); -} - static inline void atomic_xor(int i, atomic_t *v) { int guess, oldval = v->counter; @@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v) } while (guess != oldval); } +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval; +} + /* Now the true 64-bit operations. */ #define ATOMIC64_INIT(i) { (i) } @@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v) #define atomic64_read(v) READ_ONCE((v)->counter) #define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) -static inline void atomic64_add(long i, atomic64_t *v) -{ - __insn_fetchadd((void *)&v->counter, i); -} - static inline long atomic64_add_return(long i, atomic64_t *v) { int val; @@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v) return val; } -static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +#define ATOMIC64_OPS(op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long val; \ + smp_mb(); \ + val = __insn_fetch##op((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + __insn_fetch##op((void *)&v->counter, i); \ +} + +ATOMIC64_OPS(add) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) + +#undef ATOMIC64_OPS + +static inline long atomic64_fetch_xor(long i, atomic64_t *v) { long guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch(&v->counter, guess ^ i); } while (guess != oldval); - return oldval != u; -} - -static inline void atomic64_and(long i, atomic64_t *v) -{ - __insn_fetchand((void *)&v->counter, i); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - __insn_fetchor((void *)&v->counter, i); + smp_mb(); + return oldval; } static inline void atomic64_xor(long i, atomic64_t *v) @@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v) } while (guess != oldval); } +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval != u; +} + #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_inc_return(v) atomic64_add_return(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index d55222806c2f..4c419ab95ab7 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h @@ -87,6 +87,13 @@ mb_incoherent(void) #define __smp_mb__after_atomic() __smp_mb() #endif +/* + * The TILE architecture does not do speculative reads; this ensures + * that a control dependency also orders against loads and already provides + * a LOAD->{LOAD,STORE} order and can forgo the additional RMB. + */ +#define smp_acquire__after_ctrl_dep() barrier() + #include <asm-generic/barrier.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index bbf7b666f21d..d1406a95f6b7 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -19,9 +19,9 @@ #include <asm/barrier.h> /* Tile-specific routines to support <asm/bitops.h>. */ -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask); /** * set_bit - Atomically set a bit in memory @@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); */ static inline void set_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr) */ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) */ static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_or(addr, mask) & mask) != 0; + return (_atomic_fetch_or(addr, mask) & mask) != 0; } /** @@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_andn(addr, mask) & mask) != 0; + return (_atomic_fetch_andn(addr, mask) & mask) != 0; } /** @@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr, unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_xor(addr, mask) & mask) != 0; + return (_atomic_fetch_xor(addr, mask) & mask) != 0; } #include <asm-generic/bitops/ext2-atomic.h> diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index 1a6ef1b69cb1..e64a1b75fc38 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -80,16 +80,16 @@ ret = gu.err; \ } -#define __futex_set() __futex_call(__atomic_xchg) -#define __futex_add() __futex_call(__atomic_xchg_add) -#define __futex_or() __futex_call(__atomic_or) -#define __futex_andn() __futex_call(__atomic_andn) -#define __futex_xor() __futex_call(__atomic_xor) +#define __futex_set() __futex_call(__atomic32_xchg) +#define __futex_add() __futex_call(__atomic32_xchg_add) +#define __futex_or() __futex_call(__atomic32_fetch_or) +#define __futex_andn() __futex_call(__atomic32_fetch_andn) +#define __futex_xor() __futex_call(__atomic32_fetch_xor) #define __futex_cmpxchg() \ { \ - struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \ - lock, oldval, oparg); \ + struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \ + lock, oldval, oparg); \ val = gu.val; \ ret = gu.err; \ } diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 298df1e9912a..f8128800dbf5 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v) int _atomic_xchg(int *v, int n) { - return __atomic_xchg(v, __atomic_setup(v), n).val; + return __atomic32_xchg(v, __atomic_setup(v), n).val; } EXPORT_SYMBOL(_atomic_xchg); int _atomic_xchg_add(int *v, int i) { - return __atomic_xchg_add(v, __atomic_setup(v), i).val; + return __atomic32_xchg_add(v, __atomic_setup(v), i).val; } EXPORT_SYMBOL(_atomic_xchg_add); @@ -78,39 +78,39 @@ int _atomic_xchg_add_unless(int *v, int a, int u) * to use the first argument consistently as the "old value" * in the assembly, as is done for _atomic_cmpxchg(). */ - return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val; + return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val; } EXPORT_SYMBOL(_atomic_xchg_add_unless); int _atomic_cmpxchg(int *v, int o, int n) { - return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val; + return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val; } EXPORT_SYMBOL(_atomic_cmpxchg); -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask) { - return __atomic_or((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_or); +EXPORT_SYMBOL(_atomic_fetch_or); -unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask) { - return __atomic_and((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_and); +EXPORT_SYMBOL(_atomic_fetch_and); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask) { - return __atomic_andn((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_andn); +EXPORT_SYMBOL(_atomic_fetch_andn); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask) { - return __atomic_xor((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_xor); +EXPORT_SYMBOL(_atomic_fetch_xor); long long _atomic64_xchg(long long *v, long long n) @@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n) } EXPORT_SYMBOL(_atomic64_cmpxchg); -long long _atomic64_and(long long *v, long long n) +long long _atomic64_fetch_and(long long *v, long long n) { - return __atomic64_and(v, __atomic_setup(v), n); + return __atomic64_fetch_and(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_and); +EXPORT_SYMBOL(_atomic64_fetch_and); -long long _atomic64_or(long long *v, long long n) +long long _atomic64_fetch_or(long long *v, long long n) { - return __atomic64_or(v, __atomic_setup(v), n); + return __atomic64_fetch_or(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_or); +EXPORT_SYMBOL(_atomic64_fetch_or); -long long _atomic64_xor(long long *v, long long n) +long long _atomic64_fetch_xor(long long *v, long long n) { - return __atomic64_xor(v, __atomic_setup(v), n); + return __atomic64_fetch_xor(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_xor); +EXPORT_SYMBOL(_atomic64_fetch_xor); /* * If any of the atomic or futex routines hit a bad address (not in diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index f611265633d6..1a70e6c0f259 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic) .endif .endm -atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" -atomic_op _xchg, 32, "move r24, r2" -atomic_op _xchg_add, 32, "add r24, r22, r2" -atomic_op _xchg_add_unless, 32, \ + +/* + * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions. + */ + +atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" +atomic_op 32_xchg, 32, "move r24, r2" +atomic_op 32_xchg_add, 32, "add r24, r22, r2" +atomic_op 32_xchg_add_unless, 32, \ "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" -atomic_op _or, 32, "or r24, r22, r2" -atomic_op _and, 32, "and r24, r22, r2" -atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" -atomic_op _xor, 32, "xor r24, r22, r2" +atomic_op 32_fetch_or, 32, "or r24, r22, r2" +atomic_op 32_fetch_and, 32, "and r24, r22, r2" +atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" +atomic_op 32_fetch_xor, 32, "xor r24, r22, r2" atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" @@ -192,9 +197,9 @@ atomic_op 64_xchg_add_unless, 64, \ { bbns r26, 3f; add r24, r22, r4 }; \ { bbns r27, 3f; add r25, r23, r5 }; \ slt_u r26, r24, r22; add r25, r25, r26" -atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" -atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" -atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" +atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" +atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" +atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" jrp lr /* happy backtracer */ diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 88c2a53362e7..076c6cc43113 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (READ_ONCE(lock->current_ticket) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c index c8d1f94ff1fe..a4b5b2cbce93 100644 --- a/arch/tile/lib/spinlock_64.c +++ b/arch/tile/lib/spinlock_64.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (arch_spin_current(READ_ONCE(lock->lock)) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 52fef606bc54..ff574dad95cc 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -757,7 +757,6 @@ struct boot_params *make_boot_params(struct efi_config *c) struct boot_params *boot_params; struct apm_bios_info *bi; struct setup_header *hdr; - struct efi_info *efi; efi_loaded_image_t *image; void *options, *handle; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; @@ -800,7 +799,6 @@ struct boot_params *make_boot_params(struct efi_config *c) memset(boot_params, 0x0, 0x4000); hdr = &boot_params->hdr; - efi = &boot_params->efi_info; bi = &boot_params->apm_bios_info; /* Copy the second sector to boot_params */ diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 33787ee817f0..dfebbde2a4cc 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -263,7 +263,7 @@ static bool check_hw_exists(void) msr_fail: pr_cont("Broken PMU hardware detected, using software events only.\n"); - pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, reg, val_new); @@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha } EXPORT_SYMBOL_GPL(events_sysfs_show); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_ht_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_ht_attr, attr); + + /* + * Report conditional events depending on Hyper-Threading. + * + * This is overly conservative as usually the HT special + * handling is not needed if the other CPU thread is idle. + * + * Note this does not (and cannot) handle the case when thread + * siblings are invisible, for example with virtualization + * if they are owned by some other guest. The user tool + * has to re-read when a thread sibling gets onlined later. + */ + return sprintf(page, "%s", + topology_max_smt_threads() > 1 ? + pmu_attr->event_str_ht : + pmu_attr->event_str_noht); +} + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); @@ -2319,7 +2342,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct stack_frame frame; - const void __user *fp; + const unsigned long __user *fp; if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ @@ -2332,7 +2355,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) return; - fp = (void __user *)regs->bp; + fp = (unsigned long __user *)regs->bp; perf_callchain_store(entry, regs->ip); @@ -2345,16 +2368,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs pagefault_disable(); while (entry->nr < entry->max_stack) { unsigned long bytes; + frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 16)) + if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) break; - bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8); + bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); if (bytes != 0) break; - bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8); + bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp)); if (bytes != 0) break; diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index 3660b2cf245a..06c2baa51814 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o -obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o -intel-rapl-objs := rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o +intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7c666958a625..0974ba11e954 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -16,6 +16,7 @@ #include <asm/cpufeature.h> #include <asm/hardirq.h> +#include <asm/intel-family.h> #include <asm/apic.h> #include "../perf_event.h" @@ -115,6 +116,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -139,6 +144,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -177,19 +186,27 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; -struct event_constraint intel_skl_event_constraints[] = { +static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ + + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */ + EVENT_CONSTRAINT_END }; static struct extra_reg intel_knl_extra_regs[] __read_mostly = { - INTEL_UEVENT_EXTRA_REG(0x01b7, - MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, - MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), EVENT_EXTRA_END }; @@ -225,14 +242,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); -struct attribute *nhm_events_attrs[] = { +static struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), NULL, }; -struct attribute *snb_events_attrs[] = { +/* + * topdown events for Intel Core CPUs. + * + * The events are all in slots, which is a free slot in a 4 wide + * pipeline. Some events are already reported in slots, for cycle + * events we multiply by the pipeline width (4). + * + * With Hyper Threading on, topdown metrics are either summed or averaged + * between the threads of a core: (count_t0 + count_t1). + * + * For the average case the metric is always scaled to pipeline width, + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) + */ + +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, + "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ + "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, + "event=0xe,umask=0x1"); /* uops_issued.any */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, + "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, + "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, + "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ + "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, + "4", "2"); + +static struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), EVENT_PTR(mem_st_snb), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL, }; @@ -250,6 +304,10 @@ static struct event_constraint intel_hsw_event_constraints[] = { /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -258,12 +316,19 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_bdw_event_constraints[] = { +static struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ EVENT_CONSTRAINT_END }; @@ -1332,6 +1397,29 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); +/* no_alloc_cycles.not_delivered */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, + "event=0xca,umask=0x50"); +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, + "event=0xc2,umask=0x10"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, + "event=0xc2,umask=0x10"); + +static struct attribute *slm_events_attrs[] = { + EVENT_PTR(td_total_slots_slm), + EVENT_PTR(td_total_slots_scale_slm), + EVENT_PTR(td_fetch_bubbles_slm), + EVENT_PTR(td_fetch_bubbles_scale_slm), + EVENT_PTR(td_slots_issued_slm), + EVENT_PTR(td_slots_retired_slm), + NULL +}; + static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ @@ -3261,11 +3349,11 @@ static int intel_snb_pebs_broken(int cpu) u32 rev = UINT_MAX; /* default to broken for unknown models */ switch (cpu_data(cpu).x86_model) { - case 42: /* SNB */ + case INTEL_FAM6_SANDYBRIDGE: rev = 0x28; break; - case 45: /* SNB-EP */ + case INTEL_FAM6_SANDYBRIDGE_X: switch (cpu_data(cpu).x86_mask) { case 6: rev = 0x618; break; case 7: rev = 0x70c; break; @@ -3302,6 +3390,13 @@ static void intel_snb_check_microcode(void) } } +static bool is_lbr_from(unsigned long msr) +{ + unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr; + + return x86_pmu.lbr_from <= msr && msr < lbr_from_nr; +} + /* * Under certain circumstances, access certain MSR may cause #GP. * The function tests if the input MSR can be safely accessed. @@ -3322,13 +3417,24 @@ static bool check_msr(unsigned long msr, u64 mask) * Only change the bits which can be updated by wrmsrl. */ val_tmp = val_old ^ mask; + + if (is_lbr_from(msr)) + val_tmp = lbr_from_signext_quirk_wr(val_tmp); + if (wrmsrl_safe(msr, val_tmp) || rdmsrl_safe(msr, &val_new)) return false; + /* + * Quirk only affects validation in wrmsr(), so wrmsrl()'s value + * should equal rdmsrl()'s even with the quirk. + */ if (val_new != val_tmp) return false; + if (is_lbr_from(msr)) + val_old = lbr_from_signext_quirk_wr(val_old); + /* Here it's sure that the MSR can be safely accessed. * Restore the old value and return. */ @@ -3437,6 +3543,13 @@ static struct attribute *hsw_events_attrs[] = { EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL }; @@ -3508,15 +3621,15 @@ __init int intel_pmu_init(void) * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { - case 14: /* 65nm Core "Yonah" */ + case INTEL_FAM6_CORE_YONAH: pr_cont("Core events, "); break; - case 15: /* 65nm Core2 "Merom" */ + case INTEL_FAM6_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); - case 22: /* 65nm Core2 "Merom-L" */ - case 23: /* 45nm Core2 "Penryn" */ - case 29: /* 45nm Core2 "Dunnington (MP) */ + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3527,9 +3640,9 @@ __init int intel_pmu_init(void) pr_cont("Core2 events, "); break; - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3557,11 +3670,11 @@ __init int intel_pmu_init(void) pr_cont("Nehalem events, "); break; - case 28: /* 45nm Atom "Pineview" */ - case 38: /* 45nm Atom "Lincroft" */ - case 39: /* 32nm Atom "Penwell" */ - case 53: /* 32nm Atom "Cloverview" */ - case 54: /* 32nm Atom "Cedarview" */ + case INTEL_FAM6_ATOM_PINEVIEW: + case INTEL_FAM6_ATOM_LINCROFT: + case INTEL_FAM6_ATOM_PENWELL: + case INTEL_FAM6_ATOM_CLOVERVIEW: + case INTEL_FAM6_ATOM_CEDARVIEW: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3573,9 +3686,9 @@ __init int intel_pmu_init(void) pr_cont("Atom events, "); break; - case 55: /* 22nm Atom "Silvermont" */ - case 76: /* 14nm Atom "Airmont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -3587,11 +3700,12 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.cpu_events = slm_events_attrs; pr_cont("Silvermont events, "); break; - case 92: /* 14nm Atom "Goldmont" */ - case 95: /* 14nm Atom "Goldmont Denverton" */ + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_DENVERTON: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -3614,9 +3728,9 @@ __init int intel_pmu_init(void) pr_cont("Goldmont events, "); break; - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -3643,8 +3757,8 @@ __init int intel_pmu_init(void) pr_cont("Westmere events, "); break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: x86_add_quirk(intel_sandybridge_quirk); x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, @@ -3657,7 +3771,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - if (boot_cpu_data.x86_model == 45) + if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3679,8 +3793,8 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3696,7 +3810,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; - if (boot_cpu_data.x86_model == 62) + if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -3714,10 +3828,10 @@ __init int intel_pmu_init(void) break; - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: x86_add_quirk(intel_ht_bug); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3741,10 +3855,10 @@ __init int intel_pmu_init(void) pr_cont("Haswell events, "); break; - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -3777,7 +3891,7 @@ __init int intel_pmu_init(void) pr_cont("Broadwell events, "); break; - case 87: /* Knights Landing Xeon Phi */ + case INTEL_FAM6_XEON_PHI_KNL: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -3795,16 +3909,22 @@ __init int intel_pmu_init(void) pr_cont("Knights Landing events, "); break; - case 142: /* 14nm Kabylake Mobile */ - case 158: /* 14nm Kabylake Desktop */ - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - case 85: /* 14nm Skylake Server */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_skl(); + /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ + event_attr_td_recovery_bubbles.event_str_noht = + "event=0xd,umask=0x1,cmask=1"; + event_attr_td_recovery_bubbles.event_str_ht = + "event=0xd,umask=0x1,cmask=1,any=1"; + x86_pmu.event_constraints = intel_skl_event_constraints; x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; x86_pmu.extra_regs = intel_skl_extra_regs; @@ -3885,6 +4005,8 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } + if (x86_pmu.lbr_nr) + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); /* * Access extra MSR may cause #GP under certain circumstances. * E.g. KVM doesn't support offcore event @@ -3917,16 +4039,14 @@ __init int intel_pmu_init(void) */ static __init int fixup_ht_bug(void) { - int cpu = smp_processor_id(); - int w, c; + int c; /* * problem not present on this CPU model, nothing to do */ if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) return 0; - w = cpumask_weight(topology_sibling_cpumask(cpu)); - if (w > 1) { + if (topology_max_smt_threads() > 1) { pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); return 0; } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9ba4e4136a15..4c7638b91fa5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,6 +89,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = { { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } static const struct x86_cpu_id intel_cstates_match[] __initconst = { - X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */ - X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */ - X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates), - X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */ - X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */ - X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */ + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates), - X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */ - X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */ + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */ - X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */ + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates), - X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */ - X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */ - X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates), - X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */ + X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates), - X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */ - X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */ - X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */ + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), - X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */ - X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */ - X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */ - X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */ + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates), - X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */ - X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */ + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 9e2b40cdb05f..707d358e0dff 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -77,9 +77,11 @@ static enum { LBR_IND_JMP |\ LBR_FAR) -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) -#define LBR_FROM_FLAG_IN_TX (1ULL << 62) -#define LBR_FROM_FLAG_ABORT (1ULL << 61) +#define LBR_FROM_FLAG_MISPRED BIT_ULL(63) +#define LBR_FROM_FLAG_IN_TX BIT_ULL(62) +#define LBR_FROM_FLAG_ABORT BIT_ULL(61) + +#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) /* * x86control flow change classification @@ -235,6 +237,97 @@ enum { LBR_VALID, }; +/* + * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in + * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when + * TSX is not supported they have no consistent behavior: + * + * - For wrmsr(), bits 61:62 are considered part of the sign extension. + * - For HW updates (branch captures) bits 61:62 are always OFF and are not + * part of the sign extension. + * + * Therefore, if: + * + * 1) LBR has TSX format + * 2) CPU has no TSX support enabled + * + * ... then any value passed to wrmsr() must be sign extended to 63 bits and any + * value from rdmsr() must be converted to have a 61 bits sign extension, + * ignoring the TSX flags. + */ +static inline bool lbr_from_signext_quirk_needed(void) +{ + int lbr_format = x86_pmu.intel_cap.lbr_format; + bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || + boot_cpu_has(X86_FEATURE_RTM); + + return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); +} + +DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); + +/* If quirk is enabled, ensure sign extension is 63 bits: */ +inline u64 lbr_from_signext_quirk_wr(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Sign extend into bits 61:62 while preserving bit 63. + * + * Quirk is enabled when TSX is disabled. Therefore TSX bits + * in val are always OFF and must be changed to be sign + * extension bits. Since bits 59:60 are guaranteed to be + * part of the sign extension bits, we can just copy them + * to 61:62. + */ + val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; + } + return val; +} + +/* + * If quirk is needed, ensure sign extension is 61 bits: + */ +u64 lbr_from_signext_quirk_rd(u64 val) +{ + if (static_branch_unlikely(&lbr_from_quirk_key)) { + /* + * Quirk is on when TSX is not enabled. Therefore TSX + * flags must be read as OFF. + */ + val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); + } + return val; +} + +static inline void wrlbr_from(unsigned int idx, u64 val) +{ + val = lbr_from_signext_quirk_wr(val); + wrmsrl(x86_pmu.lbr_from + idx, val); +} + +static inline void wrlbr_to(unsigned int idx, u64 val) +{ + wrmsrl(x86_pmu.lbr_to + idx, val); +} + +static inline u64 rdlbr_from(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_from + idx, val); + + return lbr_from_signext_quirk_rd(val); +} + +static inline u64 rdlbr_to(unsigned int idx) +{ + u64 val; + + rdmsrl(x86_pmu.lbr_to + idx, val); + + return val; +} + static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) { int i; @@ -251,8 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) tos = task_ctx->tos; for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); - wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); + wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -262,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { - int i; unsigned lbr_idx, mask; u64 tos; + int i; if (task_ctx->lbr_callstack_users == 0) { task_ctx->lbr_stack_state = LBR_NONE; @@ -275,8 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) tos = intel_pmu_lbr_tos(); for (i = 0; i < tos; i++) { lbr_idx = (tos - i) & mask; - rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); - rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + task_ctx->lbr_from[i] = rdlbr_from(lbr_idx); + task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } @@ -452,8 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) u16 cycles = 0; int lbr_flags = lbr_desc[lbr_format]; - rdmsrl(x86_pmu.lbr_from + lbr_idx, from); - rdmsrl(x86_pmu.lbr_to + lbr_idx, to); + from = rdlbr_from(lbr_idx); + to = rdlbr_to(lbr_idx); if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; @@ -956,7 +1050,6 @@ void __init intel_pmu_lbr_init_core(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("4-deep LBR, "); } /* nehalem/westmere */ @@ -977,7 +1070,6 @@ void __init intel_pmu_lbr_init_nhm(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* sandy bridge */ @@ -997,7 +1089,6 @@ void __init intel_pmu_lbr_init_snb(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("16-deep LBR, "); } /* haswell */ @@ -1011,7 +1102,8 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map; - pr_cont("16-deep LBR, "); + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); } /* skylake */ @@ -1031,7 +1123,6 @@ __init void intel_pmu_lbr_init_skl(void) * That requires LBR_FAR but that means far * jmp need to be filtered out */ - pr_cont("32-deep LBR, "); } /* atom */ @@ -1057,7 +1148,6 @@ void __init intel_pmu_lbr_init_atom(void) * SW branch filter usage: * - compensate for lack of HW filter */ - pr_cont("8-deep LBR, "); } /* slm */ @@ -1088,6 +1178,4 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = snb_lbr_sel_map; - - pr_cont("8-deep LBR, "); } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index e30eef4f29a6..d0c58b35155f 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -55,6 +55,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "../perf_event.h" MODULE_LICENSE("GPL"); @@ -786,26 +787,27 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = { }; static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */ - X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */ - X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), - X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */ - X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */ - X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */ - X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */ - X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */ - X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */ - X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), - X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */ - X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */ + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), {}, }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index fce74062d981..59b4974c697f 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,5 @@ #include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; @@ -882,7 +883,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; int phys_id, pkg, ret; @@ -903,20 +904,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + /* - * for performance monitoring unit with multiple boxes, - * each box has a different function id. - */ - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; - /* Knights Landing uses a common PCI device ID for multiple instances of - * an uncore PMU device type. There is only one entry per device type in - * the knl_uncore_pci_ids table inspite of multiple devices present for - * some device types. Hence PCI device idx would be 0 for all devices. - * So increment pmu pointer to point to an unused array element. + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. */ - if (boot_cpu_data.x86_model == 87) { - while (pmu->func_id >= 0) - pmu++; + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = pdev->driver; + const struct pci_device_id *ids = pci_drv->id_table; + unsigned int devfn; + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn == pdev->devfn) { + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + break; + } + } + ids++; + } + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) @@ -956,7 +974,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id static void uncore_pci_remove(struct pci_dev *pdev) { - struct intel_uncore_box *box = pci_get_drvdata(pdev); + struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; int i, phys_id, pkg; @@ -1361,30 +1379,32 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = { }; static const struct intel_uncore_init_fun skl_uncore_init __initconst = { + .cpu_init = skl_uncore_cpu_init, .pci_init = skl_uncore_pci_init, }; static const struct x86_cpu_id intel_uncore_match[] __initconst = { - X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */ - X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */ - X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init), - X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */ - X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */ - X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */ - X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */ - X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */ - X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */ - X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */ - X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */ - X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */ - X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */ - X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */ - X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */ - X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */ + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), {}, }; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79766b9a3580..d6063e438158 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -15,7 +15,11 @@ #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ + ((dev << 24) | (func << 16) | (type << 8) | idx) #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) +#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff) +#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff) #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff @@ -360,6 +364,7 @@ int bdw_uncore_pci_init(void); int skl_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); +void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); /* perf_event_intel_uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 96531d2b843f..97a69dbba649 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,4 +1,4 @@ -/* Nehalem/SandBridge/Haswell uncore support */ +/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ #include "uncore.h" /* Uncore IMC PCI IDs */ @@ -9,6 +9,7 @@ #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 #define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -64,6 +65,10 @@ #define NHM_UNC_PERFEVTSEL0 0x3c0 #define NHM_UNC_UNCORE_PMC0 0x3b0 +/* SKL uncore global control */ +#define SKL_UNC_PERF_GLOBAL_CTL 0xe01 +#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1) + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); @@ -179,6 +184,60 @@ void snb_uncore_cpu_init(void) snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; } +static void skl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) { + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); + } +} + +static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); +} + +static struct intel_uncore_ops skl_uncore_msr_ops = { + .init_box = skl_uncore_msr_init_box, + .exit_box = skl_uncore_msr_exit_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +static struct intel_uncore_type skl_uncore_cbox = { + .name = "cbox", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 44, + .fixed_ctr_bits = 48, + .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, + .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, + .fixed_ctr = SNB_UNC_FIXED_CTR, + .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, + .single_fixed = 1, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .msr_offset = SNB_UNC_CBO_MSR_OFFSET, + .ops = &skl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, +}; + +static struct intel_uncore_type *skl_msr_uncores[] = { + &skl_uncore_cbox, + &snb_uncore_arb, + NULL, +}; + +void skl_uncore_cpu_init(void) +{ + uncore_msr_uncores = skl_msr_uncores; + if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) + skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + snb_uncore_arb.ops = &skl_uncore_msr_ops; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -544,6 +603,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, }; @@ -587,6 +651,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ { /* end marker */ } }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 874e8bd64d1d..824e54086e07 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = { */ static const struct pci_device_id knl_uncore_pci_ids[] = { - { /* MC UClk */ + { /* MC0 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0), }, - { /* MC DClk Channel */ + { /* MC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1), + }, + { /* MC0 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* MC0 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1), + }, + { /* MC0 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2), + }, + { /* MC1 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3), + }, + { /* MC1 DClk CH 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4), + }, + { /* MC1 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5), + }, + { /* EDC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1), + }, + { /* EDC2 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2), + }, + { /* EDC3 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3), }, - { /* EDC UClk */ + { /* EDC4 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4), + }, + { /* EDC5 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5), + }, + { /* EDC6 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6), + }, + { /* EDC7 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7), + }, + { /* EDC0 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* EDC1 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1), + }, + { /* EDC2 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2), + }, + { /* EDC3 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3), + }, + { /* EDC4 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4), + }, + { /* EDC5 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5), + }, + { /* EDC6 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6), }, - { /* EDC EClk */ + { /* EDC7 EClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7), }, { /* M2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 85ef3c2e80e0..50b3a056f96b 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,4 +1,5 @@ #include <linux/perf_event.h> +#include <asm/intel-family.h> enum perf_msr_id { PERF_MSR_TSC = 0, @@ -34,39 +35,43 @@ static bool test_intel(int idx) return false; switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE2: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: + + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: + + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: + + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: + + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_AIRMONT: if (idx == PERF_MSR_SMI) return true; break; - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8bd764df815d..8c4a47706296 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ +static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ + .id = 0, \ + .event_str_noht = noht, \ + .event_str_ht = ht, \ +} + extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) @@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); #ifdef CONFIG_CPU_SUP_AMD @@ -892,6 +902,8 @@ void intel_ds_init(void); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +u64 lbr_from_signext_quirk_wr(u64 val); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 3e8674288198..a58b99811105 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) +static __always_inline int atomic_fetch_add(int i, atomic_t *v) +{ + return xadd(&v->counter, i); +} + +static __always_inline int atomic_fetch_sub(int i, atomic_t *v) +{ + return xadd(&v->counter, -i); +} + static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); @@ -190,10 +200,29 @@ static inline void atomic_##op(int i, atomic_t *v) \ : "memory"); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int old, val = atomic_read(v); \ + for (;;) { \ + old = atomic_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &) +ATOMIC_OPS(or , |) +ATOMIC_OPS(xor, ^) +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP /** diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index a984111135b1..71d7705fb303 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ c = old; \ } -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + long long old, c = 0; \ + while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ + c = old; \ + return old; \ +} + +ATOMIC64_FETCH_OP(add, +) + +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 037351022f54..70eed0e14553 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) return atomic64_add_return(-i, v); } +static inline long atomic64_fetch_add(long i, atomic64_t *v) +{ + return xadd(&v->counter, i); +} + +static inline long atomic64_fetch_sub(long i, atomic64_t *v) +{ + return xadd(&v->counter, -i); +} + #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) @@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v) \ : "memory"); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long old, val = atomic64_read(v); \ + for (;;) { \ + old = atomic64_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4a413485f9eb..c64b1e9c5d1a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -301,10 +301,6 @@ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ -#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */ - - #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional @@ -312,5 +308,7 @@ */ #define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 78d1e7467eae..55b4596ef688 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -41,10 +41,9 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); /* * Wrap all the virtual calls in a way that forces the parameters on the stack. */ -#define arch_efi_call_virt(f, args...) \ +#define arch_efi_call_virt(p, f, args...) \ ({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) \ - efi.systab->runtime->f)(args); \ + ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ }) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) @@ -81,8 +80,8 @@ struct efi_scratch { } \ }) -#define arch_efi_call_virt(f, args...) \ - efi_call((void *)efi.systab->runtime->f, args) \ +#define arch_efi_call_virt(p, f, args...) \ + efi_call((void *)p->f, args) \ #define arch_efi_call_virt_teardown() \ ({ \ diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 85e6cda45a02..e9355a84fc67 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h @@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { /* cmpxchg because it never induces a false contention state. */ - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; return 0; diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 07537a44216e..d9850758464e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -118,10 +118,10 @@ do { \ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; - else - return 0; + + return 0; } #endif /* _ASM_X86_MUTEX_64_H */ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index fdcc04020636..7c1c89598688 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -69,29 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) } static __always_inline -u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) -{ - u64 delta = rdtsc_ordered() - src->tsc_timestamp; - return pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); -} - -static __always_inline unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, cycle_t *cycles, u8 *flags) { unsigned version; - cycle_t ret, offset; - u8 ret_flags; + cycle_t offset; + u64 delta; version = src->version; + /* Make the latest version visible */ + smp_rmb(); - offset = pvclock_get_nsec_offset(src); - ret = src->system_time + offset; - ret_flags = src->flags; - - *cycles = ret; - *flags = ret_flags; + delta = rdtsc_ordered() - src->tsc_timestamp; + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + *cycles = src->system_time + offset; + *flags = src->flags; return version; } diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c1d347..089ced4edbbc 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) : "memory", "cc"); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" - : "+m" (sem->count) - : "er" (delta)); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return delta + xadd(&sem->count, delta); -} - #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 7f991bd5031b..e346572841a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) + +extern int __max_smt_threads; + +static inline int topology_max_smt_threads(void) +{ + return __max_smt_threads; +} + int topology_update_package_map(unsigned int apicid, unsigned int cpu); extern int topology_phys_to_logical_pkg(unsigned int pkg); #else @@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg); static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index a147e676fc7b..e991d5c8bb3a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -71,8 +71,8 @@ int amd_cache_northbridges(void) while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) i++; - if (i == 0) - return 0; + if (!i) + return -ENODEV; nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 34c89a3e8260..83f1a98d37db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -46,7 +46,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) return; mce_setup(&m); - m.bank = 1; + m.bank = -1; /* Fake a memory read error with unknown channel */ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 92e5e37d97bf..58af6300992d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -425,7 +425,7 @@ static u64 mce_rdmsrl(u32 msr) } if (rdmsrl_safe(msr, &v)) { - WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); + WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr); /* * Return zero in case the access faulted. This should * not happen normally but can happen if the CPU does diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 10b0661651e0..7b7f3be783d4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -93,7 +93,7 @@ const char * const amd_df_mcablock_names[] = { EXPORT_SYMBOL_GPL(amd_df_mcablock_names); static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); -static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ +static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index bca14c899137..57b71373bae3 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -11,7 +11,11 @@ #include <linux/pci.h> #include <linux/acpi.h> +#include <linux/delay.h> +#include <linux/dmi.h> #include <linux/pci_ids.h> +#include <linux/bcma/bcma.h> +#include <linux/bcma/bcma_regs.h> #include <drm/i915_drm.h> #include <asm/pci-direct.h> #include <asm/dma.h> @@ -21,6 +25,9 @@ #include <asm/iommu.h> #include <asm/gart.h> #include <asm/irq_remapping.h> +#include <asm/early_ioremap.h> + +#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg) static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -76,6 +83,13 @@ static void __init nvidia_bugs(int num, int slot, int func) #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC /* + * Only applies to Nvidia root ports (bus 0) and not to + * Nvidia graphics cards with PCI ports on secondary buses. + */ + if (num) + return; + + /* * All timer overrides on Nvidia are * wrong unless HPET is enabled. * Unfortunately that's not true on many Asus boards. @@ -590,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func) #endif } +#define BCM4331_MMIO_SIZE 16384 +#define BCM4331_PM_CAP 0x40 +#define bcma_aread32(reg) ioread32(mmio + 1 * BCMA_CORE_SIZE + reg) +#define bcma_awrite32(reg, val) iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg) + +static void __init apple_airport_reset(int bus, int slot, int func) +{ + void __iomem *mmio; + u16 pmcsr; + u64 addr; + int i; + + if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc.")) + return; + + /* Card may have been put into PCI_D3hot by grub quirk */ + pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); + + if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; + write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr); + mdelay(10); + + pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); + if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { + dev_err("Cannot power up Apple AirPort card\n"); + return; + } + } + + addr = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); + addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32; + addr &= PCI_BASE_ADDRESS_MEM_MASK; + + mmio = early_ioremap(addr, BCM4331_MMIO_SIZE); + if (!mmio) { + dev_err("Cannot iomap Apple AirPort card\n"); + return; + } + + pr_info("Resetting Apple AirPort card (left enabled by EFI)\n"); + + for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++) + udelay(10); + + bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET); + bcma_aread32(BCMA_RESET_CTL); + udelay(1); + + bcma_awrite32(BCMA_RESET_CTL, 0); + bcma_aread32(BCMA_RESET_CTL); + udelay(10); + + early_iounmap(mmio, BCM4331_MMIO_SIZE); +} #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 @@ -603,12 +672,6 @@ struct chipset { void (*f)(int num, int slot, int func); }; -/* - * Only works for devices on the root bus. If you add any devices - * not on bus 0 readd another loop level in early_quirks(). But - * be careful because at least the Nvidia quirk here relies on - * only matching on bus 0. - */ static struct chipset early_qrk[] __initdata = { { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, @@ -638,9 +701,13 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_BROADCOM, 0x4331, + PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} }; +static void __init early_pci_scan_bus(int bus); + /** * check_dev_quirk - apply early quirks to a given PCI device * @num: bus number @@ -649,7 +716,7 @@ static struct chipset early_qrk[] __initdata = { * * Check the vendor & device ID against the early quirks table. * - * If the device is single function, let early_quirks() know so we don't + * If the device is single function, let early_pci_scan_bus() know so we don't * poke at this device again. */ static int __init check_dev_quirk(int num, int slot, int func) @@ -658,6 +725,7 @@ static int __init check_dev_quirk(int num, int slot, int func) u16 vendor; u16 device; u8 type; + u8 sec; int i; class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); @@ -685,25 +753,36 @@ static int __init check_dev_quirk(int num, int slot, int func) type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); + + if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) { + sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS); + if (sec > num) + early_pci_scan_bus(sec); + } + if (!(type & 0x80)) return -1; return 0; } -void __init early_quirks(void) +static void __init early_pci_scan_bus(int bus) { int slot, func; - if (!early_pci_allowed()) - return; - /* Poor man's PCI discovery */ - /* Only scan the root bus */ for (slot = 0; slot < 32; slot++) for (func = 0; func < 8; func++) { /* Only probe function 0 on single fn devices */ - if (check_dev_quirk(0, slot, func)) + if (check_dev_quirk(bus, slot, func)) break; } } + +void __init early_quirks(void) +{ + if (!early_pci_allowed()) + return; + + early_pci_scan_bus(0); +} diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 99bfc025111d..06c58ce46762 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -61,11 +61,16 @@ void pvclock_resume(void) u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) { unsigned version; - cycle_t ret; u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); + version = src->version; + /* Make the latest version visible */ + smp_rmb(); + + flags = src->flags; + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); return flags & valid_flags; @@ -80,6 +85,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) do { version = __pvclock_read_cycles(src, &ret, &flags); + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..2ed0ec1353f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +/* Maximum number of SMT threads on any online core */ +int __max_smt_threads __read_mostly; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu) bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; - int i; + int i, threads; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); @@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu) if (match_die(c, o) && !topology_same_node(c, o)) primarily_use_numa_for_topology(); } + + threads = cpumask_weight(topology_sibling_cpumask(cpu)); + if (threads > __max_smt_threads) + __max_smt_threads = threads; } /* maps the cpu to the sched domain representing multi-core */ @@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void) #ifdef CONFIG_HOTPLUG_CPU +/* Recompute SMT state for all CPUs on offline */ +static void recompute_smt_state(void) +{ + int max_threads, cpu; + + max_threads = 0; + for_each_online_cpu (cpu) { + int threads = cpumask_weight(topology_sibling_cpumask(cpu)); + + if (threads > max_threads) + max_threads = threads; + } + __max_smt_threads = max_threads; +} + static void remove_siblinginfo(int cpu) { int sibling; @@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu) c->phys_proc_id = 0; c->cpu_core_id = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); + recompute_smt_state(); } static void remove_cpu_from_maps(int cpu) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bbb5b283ff63..a397200281c1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) - __delay(tsc_deadline - guest_tsc); + __delay(min(tsc_deadline - guest_tsc, + nsec_to_cycles(vcpu, lapic_timer_advance_ns))); } static void start_apic_timer(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 003618e324ce..64a79f271276 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Checks for #GP/#SS exceptions. */ exn = false; - if (is_protmode(vcpu)) { + if (is_long_mode(vcpu)) { + /* Long mode: #GP(0)/#SS(0) if the memory address is in a + * non-canonical form. This is the only check on the memory + * destination for long mode! + */ + exn = is_noncanonical_address(*ret); + } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: * - segment type check (#GP(0) may be thrown) @@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * execute-only code segment */ exn = ((s.type & 0xa) == 8); - } - if (exn) { - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - if (is_long_mode(vcpu)) { - /* Long mode: #GP(0)/#SS(0) if the memory address is in a - * non-canonical form. This is an only check for long mode. - */ - exn = is_noncanonical_address(*ret); - } else if (is_protmode(vcpu)) { + if (exn) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 902d9da12392..7da5dd2057a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); static unsigned long max_tsc_khz; -static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) -{ - return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, - vcpu->arch.virtual_tsc_shift); -} - static u32 adjust_tsc_khz(u32 khz, s32 ppm) { u64 v = (u64)khz * (1000000 + ppm); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7ce3634ab5fe..a82ca466b62e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,6 +2,7 @@ #define ARCH_X86_KVM_X86_H #include <linux/kvm_host.h> +#include <asm/pvclock.h> #include "kvm_cache_regs.h" #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL @@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns; extern struct static_key kvm_no_apic_vcpu; +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) +{ + return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, + vcpu->arch.virtual_tsc_shift); +} + /* Same "calling convention" as do_div: * - divide (n << 32) by base * - put result in n diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1b1110fa0057..0493c17b8a51 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -54,8 +54,8 @@ static int kasan_die_handler(struct notifier_block *self, void *data) { if (val == DIE_GPF) { - pr_emerg("CONFIG_KASAN_INLINE enabled"); - pr_emerg("GPF could be caused by NULL-ptr deref or user memory access"); + pr_emerg("CONFIG_KASAN_INLINE enabled\n"); + pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n"); } return NOTIFY_OK; } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index b2a4e2a61f6b..3cd69832d7f4 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -396,6 +396,7 @@ int __init pci_acpi_init(void) return -ENODEV; printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; x86_init.pci.init_irq = x86_init_noop; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f93545e7dc54..d898b334ff46 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -98,21 +98,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map( return status; } -void efi_get_time(struct timespec *now) -{ - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - status = efi.get_time(&eft, &cap); - if (status != EFI_SUCCESS) - pr_err("Oops: efitime: can't read time!\n"); - - now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); - now->tv_nsec = 0; -} - void __init efi_find_mirror(void) { efi_memory_desc_t *md; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b226b3f497f1..5cb4301c4dcf 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -466,22 +466,17 @@ extern efi_status_t efi64_thunk(u32, ...); #define efi_thunk(f, ...) \ ({ \ efi_status_t __s; \ - unsigned long flags; \ - u32 func; \ + unsigned long __flags; \ + u32 __func; \ \ - efi_sync_low_kernel_mappings(); \ - local_irq_save(flags); \ + local_irq_save(__flags); \ + arch_efi_call_virt_setup(); \ \ - efi_scratch.prev_cr3 = read_cr3(); \ - write_cr3((unsigned long)efi_scratch.efi_pgt); \ - __flush_tlb_all(); \ + __func = runtime_service32(f); \ + __s = efi64_thunk(__func, __VA_ARGS__); \ \ - func = runtime_service32(f); \ - __s = efi64_thunk(func, __VA_ARGS__); \ - \ - write_cr3(efi_scratch.prev_cr3); \ - __flush_tlb_all(); \ - local_irq_restore(flags); \ + arch_efi_call_virt_teardown(); \ + local_irq_restore(__flags); \ \ __s; \ }) diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 815fec6e05e2..66b2166ea4a1 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -40,8 +40,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) */ return BIOS_STATUS_UNIMPLEMENTED; - ret = efi_call((void *)__va(tab->function), (u64)which, - a1, a2, a3, a4, a5); + ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); return ret; } EXPORT_SYMBOL_GPL(uv_bios_call); diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 009947d419a6..f2b5e6a5cf95 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -19,6 +19,7 @@ #include <asm/mtrr.h> #include <asm/sections.h> #include <asm/suspend.h> +#include <asm/tlbflush.h> /* Defined in hibernate_asm_64.S */ extern asmlinkage __visible int restore_image(void); @@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void); * kernel's text (this value is passed in the image header). */ unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; /* * Value of the cr3 register from before the hibernation (this value is passed @@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible; pgd_t *temp_level4_pgt __visible; -void *relocated_restore_code __visible; +unsigned long relocated_restore_code __visible; + +static int set_up_temporary_text_mapping(void) +{ + pmd_t *pmd; + pud_t *pud; + + /* + * The new mapping only has to cover the page containing the image + * kernel's entry point (jump_address_phys), because the switch over to + * it is carried out by relocated code running from a page allocated + * specifically for this purpose and covered by the identity mapping, so + * the temporary kernel text mapping is only needed for the final jump. + * Moreover, in that mapping the virtual address of the image kernel's + * entry point must be the same as its virtual address in the image + * kernel (restore_jump_address), so the image kernel's + * restore_registers() code doesn't find itself in a different area of + * the virtual address space after switching over to the original page + * tables used by the image kernel. + */ + pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!pud) + return -ENOMEM; + + pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd) + return -ENOMEM; + + set_pmd(pmd + pmd_index(restore_jump_address), + __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); + set_pud(pud + pud_index(restore_jump_address), + __pud(__pa(pmd) | _KERNPG_TABLE)); + set_pgd(temp_level4_pgt + pgd_index(restore_jump_address), + __pgd(__pa(pud) | _KERNPG_TABLE)); + + return 0; +} static void *alloc_pgt_page(void *context) { @@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void) if (!temp_level4_pgt) return -ENOMEM; - /* It is safe to reuse the original kernel mapping */ - set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), - init_level4_pgt[pgd_index(__START_KERNEL_map)]); + /* Prepare a temporary mapping for the kernel text */ + result = set_up_temporary_text_mapping(); + if (result) + return result; /* Set up the direct mapping from scratch */ for (i = 0; i < nr_pfn_mapped; i++) { @@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void) return 0; } +static int relocate_restore_code(void) +{ + pgd_t *pgd; + pud_t *pud; + + relocated_restore_code = get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + + memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE); + + /* Make the page containing the relocated code executable */ + pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code); + pud = pud_offset(pgd, relocated_restore_code); + if (pud_large(*pud)) { + set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); + } else { + pmd_t *pmd = pmd_offset(pud, relocated_restore_code); + + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); + } else { + pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code); + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); + } + } + __flush_tlb_all(); + + return 0; +} + int swsusp_arch_resume(void) { int error; /* We have got enough memory and from now on we cannot recover */ - if ((error = set_up_temporary_mappings())) + error = set_up_temporary_mappings(); + if (error) return error; - relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); - if (!relocated_restore_code) - return -ENOMEM; - memcpy(relocated_restore_code, &core_restore_code, - &restore_registers - &core_restore_code); + error = relocate_restore_code(); + if (error) + return error; restore_image(); return 0; @@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn) struct restore_data_record { unsigned long jump_address; + unsigned long jump_address_phys; unsigned long cr3; unsigned long magic; }; -#define RESTORE_MAGIC 0x0123456789ABCDEFUL +#define RESTORE_MAGIC 0x123456789ABCDEF0UL /** * arch_hibernation_header_save - populate the architecture specific part @@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) if (max_size < sizeof(struct restore_data_record)) return -EOVERFLOW; - rdr->jump_address = restore_jump_address; + rdr->jump_address = (unsigned long)&restore_registers; + rdr->jump_address_phys = __pa_symbol(&restore_registers); rdr->cr3 = restore_cr3; rdr->magic = RESTORE_MAGIC; return 0; @@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr) struct restore_data_record *rdr = addr; restore_jump_address = rdr->jump_address; + jump_address_phys = rdr->jump_address_phys; restore_cr3 = rdr->cr3; return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; } diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 4400a43b9e28..3177c2bc26f6 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend) pushfq popq pt_regs_flags(%rax) - /* save the address of restore_registers */ - movq $restore_registers, %rax - movq %rax, restore_jump_address(%rip) /* save cr3 */ movq %cr3, %rax movq %rax, restore_cr3(%rip) @@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend) ENDPROC(swsusp_arch_suspend) ENTRY(restore_image) - /* switch to temporary page tables */ - movq $__PAGE_OFFSET, %rdx - movq temp_level4_pgt(%rip), %rax - subq %rdx, %rax - movq %rax, %cr3 - /* Flush TLB */ - movq mmu_cr4_features(%rip), %rax - movq %rax, %rdx - andq $~(X86_CR4_PGE), %rdx - movq %rdx, %cr4; # turn off PGE - movq %cr3, %rcx; # flush TLB - movq %rcx, %cr3; - movq %rax, %cr4; # turn PGE back on - /* prepare to jump to the image kernel */ - movq restore_jump_address(%rip), %rax - movq restore_cr3(%rip), %rbx + movq restore_jump_address(%rip), %r8 + movq restore_cr3(%rip), %r9 + + /* prepare to switch to temporary page tables */ + movq temp_level4_pgt(%rip), %rax + movq mmu_cr4_features(%rip), %rbx /* prepare to copy image data to their original locations */ movq restore_pblist(%rip), %rdx + + /* jump to relocated restore code */ movq relocated_restore_code(%rip), %rcx jmpq *%rcx /* code below has been relocated to a safe page */ ENTRY(core_restore_code) + /* switch to temporary page tables */ + movq $__PAGE_OFFSET, %rcx + subq %rcx, %rax + movq %rax, %cr3 + /* flush TLB */ + movq %rbx, %rcx + andq $~(X86_CR4_PGE), %rcx + movq %rcx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3; + movq %rbx, %cr4; # turn PGE back on .Lloop: testq %rdx, %rdx jz .Ldone @@ -96,24 +96,17 @@ ENTRY(core_restore_code) /* progress to the next pbe */ movq pbe_next(%rdx), %rdx jmp .Lloop + .Ldone: /* jump to the restore_registers address from the image header */ - jmpq *%rax - /* - * NOTE: This assumes that the boot kernel's text mapping covers the - * image kernel's page containing restore_registers and the address of - * this page is the same as in the image kernel's text mapping (it - * should always be true, because the text mapping is linear, starting - * from 0, and is supposed to cover the entire kernel text for every - * kernel). - * - * code below belongs to the image kernel - */ + jmpq *%r8 + /* code below belongs to the image kernel */ + .align PAGE_SIZE ENTRY(restore_registers) FRAME_BEGIN /* go back to the original page tables */ - movq %rbx, %cr3 + movq %r9, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index e69f4701a076..1104515d5ad2 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -241,6 +241,31 @@ static void toggle_nb_mca_mst_cpu(u16 nid) __func__, PCI_FUNC(F3->devfn), NBCFG); } +static void prepare_msrs(void *info) +{ + struct mce i_mce = *(struct mce *)info; + u8 b = i_mce.bank; + + wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus); + + if (boot_cpu_has(X86_FEATURE_SMCA)) { + if (i_mce.inject_flags == DFR_INT_INJ) { + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status); + wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr); + } else { + wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status); + wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr); + } + + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc); + } else { + wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status); + wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr); + wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc); + } + +} + static void do_inject(void) { u64 mcg_status = 0; @@ -287,36 +312,9 @@ static void do_inject(void) toggle_hw_mce_inject(cpu, true); - wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, - (u32)mcg_status, (u32)(mcg_status >> 32)); - - if (boot_cpu_has(X86_FEATURE_SMCA)) { - if (inj_type == DFR_INT_INJ) { - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b), - (u32)i_mce.status, (u32)(i_mce.status >> 32)); - - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b), - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); - } else { - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b), - (u32)i_mce.status, (u32)(i_mce.status >> 32)); - - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b), - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); - } - - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b), - (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); - } else { - wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), - (u32)i_mce.status, (u32)(i_mce.status >> 32)); - - wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); - - wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), - (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); - } + i_mce.mcgstatus = mcg_status; + i_mce.inject_flags = inj_type; + smp_call_function_single(cpu, prepare_msrs, &i_mce, 0); toggle_hw_mce_inject(cpu, false); diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index fd8017ce298a..e7a23f2a519a 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return result; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32i %1, %3, 0\n" \ + " wsr %1, scompare1\n" \ + " " #op " %0, %1, %2\n" \ + " s32c1i %0, %3, 0\n" \ + " bne %0, %1, 1b\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ + \ + return result; \ +} + #else /* XCHAL_HAVE_S32C1I */ #define ATOMIC_OP(op) \ @@ -138,18 +158,42 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return vval; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned int tmp, vval; \ + \ + __asm__ __volatile__( \ + " rsil a15,"__stringify(TOPLEVEL)"\n" \ + " l32i %0, %3, 0\n" \ + " " #op " %1, %0, %2\n" \ + " s32i %1, %3, 0\n" \ + " wsr a15, ps\n" \ + " rsync\n" \ + : "=&a" (vval), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "a15", "memory" \ + ); \ + \ + return vval; \ +} + #endif /* XCHAL_HAVE_S32C1I */ -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h index 1d95fa5dcd10..a36221cf6363 100644 --- a/arch/xtensa/include/asm/spinlock.h +++ b/arch/xtensa/include/asm/spinlock.h @@ -11,6 +11,9 @@ #ifndef _XTENSA_SPINLOCK_H #define _XTENSA_SPINLOCK_H +#include <asm/barrier.h> +#include <asm/processor.h> + /* * spinlock * @@ -29,8 +32,11 @@ */ #define arch_spin_is_locked(x) ((x)->slock != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c index 5f4bd71971d6..4904c5c16918 100644 --- a/arch/xtensa/platforms/xt2000/setup.c +++ b/arch/xtensa/platforms/xt2000/setup.c @@ -113,7 +113,6 @@ void platform_heartbeat(void) } //#define RS_TABLE_SIZE 2 -//#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF|UPF_SKIP_TEST) #define _SERIAL_PORT(_base,_irq) \ { \ |