summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/cmpxchg.h2
-rw-r--r--arch/arm/include/asm/barrier.h2
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/avr32/include/asm/cmpxchg.h2
-rw-r--r--arch/hexagon/include/asm/cmpxchg.h1
-rw-r--r--arch/ia64/include/asm/barrier.h7
-rw-r--r--arch/ia64/include/uapi/asm/cmpxchg.h2
-rw-r--r--arch/m32r/include/asm/cmpxchg.h2
-rw-r--r--arch/m68k/include/asm/cmpxchg.h1
-rw-r--r--arch/metag/include/asm/barrier.h2
-rw-r--r--arch/metag/include/asm/cmpxchg.h2
-rw-r--r--arch/mips/include/asm/barrier.h4
-rw-r--r--arch/mips/include/asm/cmpxchg.h2
-rw-r--r--arch/parisc/include/asm/cmpxchg.h2
-rw-r--r--arch/powerpc/include/asm/barrier.h3
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h1
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h2
-rw-r--r--arch/score/include/asm/cmpxchg.h2
-rw-r--r--arch/sh/include/asm/barrier.h2
-rw-r--r--arch/sh/include/asm/cmpxchg.h2
-rw-r--r--arch/sparc/include/asm/barrier_64.h4
-rw-r--r--arch/sparc/include/asm/cmpxchg_32.h1
-rw-r--r--arch/sparc/include/asm/cmpxchg_64.h2
-rw-r--r--arch/tile/include/asm/atomic_64.h3
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/paravirt.h29
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/qspinlock.h57
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h6
-rw-r--r--arch/x86/include/asm/spinlock.h5
-rw-r--r--arch/x86/include/asm/spinlock_types.h4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event.c20
-rw-r--r--arch/x86/kernel/cpu/perf_event.h21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c224
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cqm.c108
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c317
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c38
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c20
-rw-r--r--arch/x86/kernel/kvm.c43
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c24
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c22
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c22
-rw-r--r--arch/x86/um/asm/barrier.h3
-rw-r--r--arch/x86/xen/spinlock.c64
51 files changed, 768 insertions, 369 deletions
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 429e8cd0d78e..e5117766529e 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -66,6 +66,4 @@
#undef __ASM__MB
#undef ____cmpxchg
-#define __HAVE_ARCH_CMPXCHG 1
-
#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6b8c1c..6c2327e1c732 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71f19c4dc0de..0fa47c4275cb 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do { \
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define nop() asm volatile("nop");
#define smp_mb__before_atomic() smp_mb()
diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h
index 962a6aeab787..366bbeaeb405 100644
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void);
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 9e7802911a57..a6e34e2acbba 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
* looks just like atomic_cmpxchg on our arch currently with a bunch of
* variable casting.
*/
-#define __HAVE_ARCH_CMPXCHG 1
#define cmpxchg(ptr, old, new) \
({ \
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index f6769eb2bbf9..843ba435e43b 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do { \
___p1; \
})
-/*
- * XXX check on this ---I suspect what Linus really wants here is
- * acquire vs release semantics but we can't discuss this stuff with
- * Linus just yet. Grrr...
- */
-#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
/*
* The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index f35109b1d907..a0e3620f8f13 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
* indicated by comparing RETURN with OLD.
*/
-#define __HAVE_ARCH_CMPXCHG 1
-
/*
* This function doesn't exist, so you'll get a linker error
* if something tries to do an invalid cmpxchg().
diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h
index de651db20b43..14bf9b739dd2 100644
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \
sizeof(*(ptr))))
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
{
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index bc755bc620ad..83b1df80f0ac 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
* indicated by comparing RETURN with OLD.
*/
#ifdef CONFIG_RMW_INSNS
-#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
unsigned long new, int size)
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index d703d8e26a65..5a696e507930 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
index b1bc1be8540f..be29e3e44321 100644
--- a/arch/metag/include/asm/cmpxchg.h
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
return old;
}
-#define __HAVE_ARCH_CMPXCHG 1
-
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 2b8bbbcb9be0..7ecba84656d4 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,8 +112,8 @@
#define __WEAK_LLSC_MB " \n"
#endif
-#define set_mb(var, value) \
- do { var = value; smp_mb(); } while (0)
+#define smp_store_mb(var, value) \
+ do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 412f945f1f5e..b71ab4a5fd50 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
})
-#define __HAVE_ARCH_CMPXCHG 1
-
#define __cmpxchg_asm(ld, st, m, old, new) \
({ \
__typeof(*(m)) __ret; \
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index dbd13354ec41..0a90b965cccb 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
#define xchg(ptr, x) \
((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
-#define __HAVE_ARCH_CMPXCHG 1
-
/* bug catcher for when unsupported size is used - won't link */
extern void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index a3bf5be111ff..51ccc7232042 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
@@ -89,5 +89,6 @@ do { \
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
+#define smp_mb__before_spinlock() smp_mb()
#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d463c68fe7f0..ad6263cffb0f 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
*/
-#define __HAVE_ARCH_CMPXCHG 1
static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d724718ec21..e6f8615a11eb 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4eadec466b8c..411464f4c97a 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
__old; \
})
-#define __HAVE_ARCH_CMPXCHG
-
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
({ \
register __typeof__(*(p1)) __old1 asm("2") = (o1); \
diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h
index f384839c3ee5..cc3f6420b71c 100644
--- a/arch/score/include/asm/cmpxchg.h
+++ b/arch/score/include/asm/cmpxchg.h
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
(unsigned long)(o), \
(unsigned long)(n)))
-#define __HAVE_ARCH_CMPXCHG 1
-
#include <asm-generic/cmpxchg-local.h>
#endif /* _ASM_SCORE_CMPXCHG_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 43715308b068..bf91037db4e0 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#endif
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#include <asm-generic/barrier.h>
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index f6bd1406b897..85c97b188d71 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
* if something tries to do an invalid cmpxchg(). */
extern void __cmpxchg_called_with_bad_pointer(void);
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
unsigned long new, int size)
{
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 76648941fea7..809941e33e12 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
#define dma_rmb() rmb()
#define dma_wmb() wmb()
-#define set_mb(__var, __value) \
- do { __var = __value; membar_safe("#StoreLoad"); } while(0)
+#define smp_store_mb(__var, __value) \
+ do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
#ifdef CONFIG_SMP
#define smp_mb() mb()
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index d38b52dca216..83ffb83c5397 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
*
* Cribbed from <asm-parisc/atomic.h>
*/
-#define __HAVE_ARCH_CMPXCHG 1
/* bug catcher for when unsupported size is used - won't link */
void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 0e1ed6cfbf68..faa2f61058c2 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
#include <asm-generic/cmpxchg-local.h>
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long
__cmpxchg_u32(volatile int *m, int old, int new)
{
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 7b11c5fadd42..0496970cef82 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-/* Define this to indicate that cmpxchg is an efficient operation. */
-#define __HAVE_ARCH_CMPXCHG
-
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 226d5696e1d1..4e986e809861 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,7 +127,8 @@ config X86
select MODULES_USE_ELF_RELA if X86_64
select CLONE_BACKWARDS if X86_32
select ARCH_USE_BUILTIN_BSWAP
- select ARCH_USE_QUEUE_RWLOCK
+ select ARCH_USE_QUEUED_SPINLOCKS
+ select ARCH_USE_QUEUED_RWLOCKS
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select OLD_SIGACTION if X86_32
select COMPAT_OLD_SIGACTION if IA32_EMULATION
@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
config PARAVIRT_SPINLOCKS
bool "Paravirtualization layer for spinlocks"
depends on PARAVIRT && SMP
- select UNINLINE_SPIN_UNLOCK
+ select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
---help---
Paravirtualized spinlocks allow a pvops backend to replace the
spinlock implementation with something virtualization-friendly
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 959e45b81fe2..e51a8f803f55 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
#define smp_mb() mb()
#define smp_rmb() dma_rmb()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
#else /* !SMP */
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#endif /* SMP */
#define read_barrier_depends() do { } while (0)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99c105d78b7e..ad19841eddfe 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,8 +4,6 @@
#include <linux/compiler.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
-#define __HAVE_ARCH_CMPXCHG 1
-
/*
* Non-existant functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error().
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8957810ad7d1..d143bfad45d7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
+ u32 val)
+{
+ PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
+}
+
+static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
+}
+
+static __always_inline void pv_wait(u8 *ptr, u8 val)
+{
+ PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
+}
+
+static __always_inline void pv_kick(int cpu)
+{
+ PVOP_VCALL1(pv_lock_ops.kick, cpu);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
__ticket_t ticket)
{
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
}
-#endif
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f7b0b5c112f2..8766c7c395c2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -333,9 +333,19 @@ struct arch_spinlock;
typedef u16 __ticket_t;
#endif
+struct qspinlock;
+
struct pv_lock_ops {
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
+ struct paravirt_callee_save queued_spin_unlock;
+
+ void (*wait)(u8 *ptr, u8 val);
+ void (*kick)(int cpu);
+#else /* !CONFIG_QUEUED_SPINLOCKS */
struct paravirt_callee_save lock_spinning;
void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
};
/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
new file mode 100644
index 000000000000..9d51fae1cba3
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock.h
@@ -0,0 +1,57 @@
+#ifndef _ASM_X86_QSPINLOCK_H
+#define _ASM_X86_QSPINLOCK_H
+
+#include <asm/cpufeature.h>
+#include <asm-generic/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#define queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ *
+ * A smp_store_release() on the least-significant byte.
+ */
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+ smp_store_release((u8 *)lock, 0);
+}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+ pv_queued_spin_lock_slowpath(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ pv_queued_spin_unlock(lock);
+}
+#else
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+#endif
+
+#define virt_queued_spin_lock virt_queued_spin_lock
+
+static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+{
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ return false;
+
+ while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
+ cpu_relax();
+
+ return true;
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_X86_QSPINLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000000000000..b002e711ba88
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_QSPINLOCK_PARAVIRT_H
+#define __ASM_QSPINLOCK_PARAVIRT_H
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+
+#endif
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 64b611782ef0..be0a05913b91 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -42,6 +42,10 @@
extern struct static_key paravirt_ticketlocks_enabled;
static __always_inline bool static_key_false(struct static_key *key);
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
cpu_relax();
}
}
+#endif /* CONFIG_QUEUED_SPINLOCKS */
/*
* Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 5f9d7572d82b..65c3e37f879a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
typedef struct arch_spinlock {
union {
__ticketpair_t head_tail;
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
#include <asm-generic/qrwlock_types.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 20190bdac9d5..95cf78d44ab4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -53,9 +53,12 @@
static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define rcu_dereference_check_mce(p) \
- rcu_dereference_index_check((p), \
- rcu_read_lock_sched_held() || \
- lockdep_is_held(&mce_chrdev_read_mutex))
+({ \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+ lockdep_is_held(&mce_chrdev_read_mutex), \
+ "suspicious rcu_dereference_check_mce() usage"); \
+ smp_load_acquire(&(p)); \
+})
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -1887,7 +1890,7 @@ out:
static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
- if (rcu_access_index(mcelog.next))
+ if (READ_ONCE(mcelog.next))
return POLLIN | POLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
return POLLIN | POLLRDNORM;
@@ -1932,8 +1935,8 @@ void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
}
EXPORT_SYMBOL_GPL(register_mce_write_callback);
-ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
- size_t usize, loff_t *off)
+static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
+ size_t usize, loff_t *off)
{
if (mce_write)
return mce_write(filp, ubuf, usize, off);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6be186ccef46..5801a14f7524 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -910,10 +910,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (x86_pmu.commit_scheduling)
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
- }
-
- if (!assign || unsched) {
-
+ } else {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
/*
@@ -1126,13 +1123,16 @@ int x86_perf_event_set_period(struct perf_event *event)
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
- /*
- * The hw event starts counting from this event offset,
- * mark it to be able to extra future deltas:
- */
- local64_set(&hwc->prev_count, (u64)-left);
+ if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+ local64_read(&hwc->prev_count) != (u64)-left) {
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extra future deltas:
+ */
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ }
/*
* Due to erratum on certan cpu we need
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index f068695eaca0..3e7fd27dfe20 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -75,6 +75,8 @@ struct event_constraint {
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
struct amd_nb {
@@ -88,6 +90,18 @@ struct amd_nb {
#define MAX_PEBS_EVENTS 8
/*
+ * Flags PEBS can handle without an PMI.
+ *
+ * TID can only be handled by flushing at context switch.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+ PERF_SAMPLE_TRANSACTION)
+
+/*
* A debug store configuration.
*
* We only support architectures that use 64bit fields.
@@ -133,7 +147,6 @@ enum intel_excl_state_type {
};
struct intel_excl_states {
- enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
enum intel_excl_state_type state[X86_PMC_IDX_MAX];
bool sched_started; /* true if scheduling has started */
};
@@ -527,10 +540,10 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
- void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
-
void (*start_scheduling)(struct cpu_hw_events *cpuc);
+ void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
void (*stop_scheduling)(struct cpu_hw_events *cpuc);
struct event_constraint *event_constraints;
@@ -870,6 +883,8 @@ void intel_pmu_pebs_enable_all(void);
void intel_pmu_pebs_disable_all(void);
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2813ea0f142e..19980d9a6cc9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1903,9 +1903,8 @@ static void
intel_start_scheduling(struct cpu_hw_events *cpuc)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl, *xlo;
+ struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid; /* sibling thread */
/*
* nothing needed if in group validation mode
@@ -1916,10 +1915,9 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
/*
* no exclusion needed
*/
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return;
- xlo = &excl_cntrs->states[o_tid];
xl = &excl_cntrs->states[tid];
xl->sched_started = true;
@@ -1928,22 +1926,41 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
* in stop_event_scheduling()
* makes scheduling appear as a transaction
*/
- WARN_ON_ONCE(!irqs_disabled());
raw_spin_lock(&excl_cntrs->lock);
+}
- /*
- * save initial state of sibling thread
- */
- memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+ struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+ struct event_constraint *c = cpuc->event_constraint[idx];
+ struct intel_excl_states *xl;
+ int tid = cpuc->excl_thread_id;
+
+ if (cpuc->is_fake || !is_ht_workaround_enabled())
+ return;
+
+ if (WARN_ON_ONCE(!excl_cntrs))
+ return;
+
+ if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+ return;
+
+ xl = &excl_cntrs->states[tid];
+
+ lockdep_assert_held(&excl_cntrs->lock);
+
+ if (c->flags & PERF_X86_EVENT_EXCL)
+ xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
+ else
+ xl->state[cntr] = INTEL_EXCL_SHARED;
}
static void
intel_stop_scheduling(struct cpu_hw_events *cpuc)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl, *xlo;
+ struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid; /* sibling thread */
/*
* nothing needed if in group validation mode
@@ -1953,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
/*
* no exclusion needed
*/
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return;
- xlo = &excl_cntrs->states[o_tid];
xl = &excl_cntrs->states[tid];
- /*
- * make new sibling thread state visible
- */
- memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
-
xl->sched_started = false;
/*
* release shared state lock (acquired in intel_start_scheduling())
@@ -1975,12 +1986,10 @@ static struct event_constraint *
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
int idx, struct event_constraint *c)
{
- struct event_constraint *cx;
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xl, *xlo;
- int is_excl, i;
+ struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid; /* alternate */
+ int is_excl, i;
/*
* validating a group does not require
@@ -1992,27 +2001,8 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
/*
* no exclusion needed
*/
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return c;
- /*
- * event requires exclusive counter access
- * across HT threads
- */
- is_excl = c->flags & PERF_X86_EVENT_EXCL;
- if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
- event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
- if (!cpuc->n_excl++)
- WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
- }
-
- /*
- * xl = state of current HT
- * xlo = state of sibling HT
- */
- xl = &excl_cntrs->states[tid];
- xlo = &excl_cntrs->states[o_tid];
-
- cx = c;
/*
* because we modify the constraint, we need
@@ -2023,10 +2013,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* been cloned (marked dynamic)
*/
if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-
- /* sanity check */
- if (idx < 0)
- return &emptyconstraint;
+ struct event_constraint *cx;
/*
* grab pre-allocated constraint entry
@@ -2037,13 +2024,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* initialize dynamic constraint
* with static constraint
*/
- memcpy(cx, c, sizeof(*cx));
+ *cx = *c;
/*
* mark constraint as dynamic, so we
* can free it later on
*/
cx->flags |= PERF_X86_EVENT_DYNAMIC;
+ c = cx;
}
/*
@@ -2054,6 +2042,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
*/
/*
+ * state of sibling HT
+ */
+ xlo = &excl_cntrs->states[tid ^ 1];
+
+ /*
+ * event requires exclusive counter access
+ * across HT threads
+ */
+ is_excl = c->flags & PERF_X86_EVENT_EXCL;
+ if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+ event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+ if (!cpuc->n_excl++)
+ WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+ }
+
+ /*
* Modify static constraint with current dynamic
* state of thread
*
@@ -2061,37 +2065,37 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
- for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+ for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
- if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
- __clear_bit(i, cx->idxmsk);
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ __clear_bit(i, c->idxmsk);
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
- if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
- __clear_bit(i, cx->idxmsk);
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ __clear_bit(i, c->idxmsk);
}
/*
* recompute actual bit weight for scheduling algorithm
*/
- cx->weight = hweight64(cx->idxmsk64);
+ c->weight = hweight64(c->idxmsk64);
/*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
- if (cx->weight == 0)
- cx = &emptyconstraint;
+ if (c->weight == 0)
+ c = &emptyconstraint;
- return cx;
+ return c;
}
static struct event_constraint *
@@ -2124,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
{
struct hw_perf_event *hwc = &event->hw;
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct intel_excl_states *xlo, *xl;
- unsigned long flags = 0; /* keep compiler happy */
int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid;
+ struct intel_excl_states *xl;
/*
* nothing needed if in group validation mode
@@ -2135,13 +2137,9 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
if (cpuc->is_fake)
return;
- WARN_ON_ONCE(!excl_cntrs);
-
- if (!excl_cntrs)
+ if (WARN_ON_ONCE(!excl_cntrs))
return;
- xl = &excl_cntrs->states[tid];
- xlo = &excl_cntrs->states[o_tid];
if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
if (!--cpuc->n_excl)
@@ -2149,22 +2147,25 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
}
/*
- * put_constraint may be called from x86_schedule_events()
- * which already has the lock held so here make locking
- * conditional
+ * If event was actually assigned, then mark the counter state as
+ * unused now.
*/
- if (!xl->sched_started)
- raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+ if (hwc->idx >= 0) {
+ xl = &excl_cntrs->states[tid];
- /*
- * if event was actually assigned, then mark the
- * counter state as unused now
- */
- if (hwc->idx >= 0)
- xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+ /*
+ * put_constraint may be called from x86_schedule_events()
+ * which already has the lock held so here make locking
+ * conditional.
+ */
+ if (!xl->sched_started)
+ raw_spin_lock(&excl_cntrs->lock);
- if (!xl->sched_started)
- raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
+ xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+ if (!xl->sched_started)
+ raw_spin_unlock(&excl_cntrs->lock);
+ }
}
static void
@@ -2196,41 +2197,6 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
intel_put_excl_constraints(cpuc, event);
}
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
-{
- struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
- struct event_constraint *c = cpuc->event_constraint[idx];
- struct intel_excl_states *xlo, *xl;
- int tid = cpuc->excl_thread_id;
- int o_tid = 1 - tid;
- int is_excl;
-
- if (cpuc->is_fake || !c)
- return;
-
- is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
- if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
- return;
-
- WARN_ON_ONCE(!excl_cntrs);
-
- if (!excl_cntrs)
- return;
-
- xl = &excl_cntrs->states[tid];
- xlo = &excl_cntrs->states[o_tid];
-
- WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
-
- if (cntr >= 0) {
- if (is_excl)
- xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
- else
- xlo->init_state[cntr] = INTEL_EXCL_SHARED;
- }
-}
-
static void intel_pebs_aliases_core2(struct perf_event *event)
{
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
@@ -2294,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
- if (event->attr.precise_ip && x86_pmu.pebs_aliases)
- x86_pmu.pebs_aliases(event);
+ if (event->attr.precise_ip) {
+ if (!event->attr.freq) {
+ event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+ if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+ event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+ }
+ if (x86_pmu.pebs_aliases)
+ x86_pmu.pebs_aliases(event);
+ }
if (needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
@@ -2544,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
{
struct intel_excl_cntrs *c;
- int i;
c = kzalloc_node(sizeof(struct intel_excl_cntrs),
GFP_KERNEL, cpu_to_node(cpu));
if (c) {
raw_spin_lock_init(&c->lock);
- for (i = 0; i < X86_PMC_IDX_MAX; i++) {
- c->states[0].state[i] = INTEL_EXCL_UNUSED;
- c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
-
- c->states[1].state[i] = INTEL_EXCL_UNUSED;
- c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
- }
c->core_id = -1;
}
return c;
@@ -2677,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+ bool sched_in)
+{
+ if (x86_pmu.pebs_active)
+ intel_pmu_pebs_sched_task(ctx, sched_in);
+ if (x86_pmu.lbr_nr)
+ intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2766,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
- .sched_task = intel_pmu_lbr_sched_task,
+ .sched_task = intel_pmu_sched_task,
};
static __init void intel_clovertown_quirk(void)
@@ -2939,8 +2913,8 @@ static __init void intel_ht_bug(void)
{
x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
- x86_pmu.commit_scheduling = intel_commit_scheduling;
x86_pmu.start_scheduling = intel_start_scheduling;
+ x86_pmu.commit_scheduling = intel_commit_scheduling;
x86_pmu.stop_scheduling = intel_stop_scheduling;
}
@@ -3398,8 +3372,8 @@ static __init int fixup_ht_bug(void)
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
- x86_pmu.commit_scheduling = NULL;
x86_pmu.start_scheduling = NULL;
+ x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL;
watchdog_nmi_enable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b738fa..188076161c1b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -13,16 +13,35 @@
#define MSR_IA32_QM_CTR 0x0c8e
#define MSR_IA32_QM_EVTSEL 0x0c8d
-static unsigned int cqm_max_rmid = -1;
+static u32 cqm_max_rmid = -1;
static unsigned int cqm_l3_scale; /* supposedly cacheline size */
-struct intel_cqm_state {
- raw_spinlock_t lock;
- int rmid;
- int cnt;
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid: The cached Resource Monitoring ID
+ * @closid: The cached Class Of Service ID
+ * @rmid_usecnt: The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+ u32 rmid;
+ u32 closid;
+ int rmid_usecnt;
};
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
/*
* Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -57,7 +76,7 @@ static cpumask_t cqm_cpumask;
* near-zero occupancy value, i.e. no cachelines are tagged with this
* RMID, once __intel_cqm_rmid_rotate() returns.
*/
-static unsigned int intel_cqm_rotation_rmid;
+static u32 intel_cqm_rotation_rmid;
#define INVALID_RMID (-1)
@@ -69,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid;
* Likewise, an rmid value of -1 is used to indicate "no rmid currently
* assigned" and is used as part of the rotation code.
*/
-static inline bool __rmid_valid(unsigned int rmid)
+static inline bool __rmid_valid(u32 rmid)
{
if (!rmid || rmid == INVALID_RMID)
return false;
@@ -77,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid)
return true;
}
-static u64 __rmid_read(unsigned int rmid)
+static u64 __rmid_read(u32 rmid)
{
u64 val;
@@ -102,7 +121,7 @@ enum rmid_recycle_state {
};
struct cqm_rmid_entry {
- unsigned int rmid;
+ u32 rmid;
enum rmid_recycle_state state;
struct list_head list;
unsigned long queue_time;
@@ -147,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru);
*/
static struct cqm_rmid_entry **cqm_rmid_ptrs;
-static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
{
struct cqm_rmid_entry *entry;
@@ -162,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
*
* We expect to be called with cache_mutex held.
*/
-static int __get_rmid(void)
+static u32 __get_rmid(void)
{
struct cqm_rmid_entry *entry;
@@ -177,7 +196,7 @@ static int __get_rmid(void)
return entry->rmid;
}
-static void __put_rmid(unsigned int rmid)
+static void __put_rmid(u32 rmid)
{
struct cqm_rmid_entry *entry;
@@ -372,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
}
struct rmid_read {
- unsigned int rmid;
+ u32 rmid;
atomic64_t value;
};
@@ -381,12 +400,11 @@ static void __intel_cqm_event_count(void *info);
/*
* Exchange the RMID of a group of events.
*/
-static unsigned int
-intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
{
struct perf_event *event;
- unsigned int old_rmid = group->hw.cqm_rmid;
struct list_head *head = &group->hw.cqm_group_entry;
+ u32 old_rmid = group->hw.cqm_rmid;
lockdep_assert_held(&cache_mutex);
@@ -451,7 +469,7 @@ static void intel_cqm_stable(void *arg)
* If we have group events waiting for an RMID that don't conflict with
* events already running, assign @rmid.
*/
-static bool intel_cqm_sched_in_event(unsigned int rmid)
+static bool intel_cqm_sched_in_event(u32 rmid)
{
struct perf_event *leader, *event;
@@ -598,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available)
static void __intel_cqm_pick_and_rotate(struct perf_event *next)
{
struct perf_event *rotor;
- unsigned int rmid;
+ u32 rmid;
lockdep_assert_held(&cache_mutex);
@@ -626,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next)
static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
{
struct perf_event *group, *g;
- unsigned int rmid;
+ u32 rmid;
lockdep_assert_held(&cache_mutex);
@@ -828,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
struct perf_event **group)
{
struct perf_event *iter;
- unsigned int rmid;
bool conflict = false;
+ u32 rmid;
list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
rmid = iter->hw.cqm_rmid;
@@ -860,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
static void intel_cqm_event_read(struct perf_event *event)
{
unsigned long flags;
- unsigned int rmid;
+ u32 rmid;
u64 val;
/*
@@ -961,55 +979,48 @@ out:
static void intel_cqm_event_start(struct perf_event *event, int mode)
{
- struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
- unsigned int rmid = event->hw.cqm_rmid;
- unsigned long flags;
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 rmid = event->hw.cqm_rmid;
if (!(event->hw.cqm_state & PERF_HES_STOPPED))
return;
event->hw.cqm_state &= ~PERF_HES_STOPPED;
- raw_spin_lock_irqsave(&state->lock, flags);
-
- if (state->cnt++)
- WARN_ON_ONCE(state->rmid != rmid);
- else
+ if (state->rmid_usecnt++) {
+ if (!WARN_ON_ONCE(state->rmid != rmid))
+ return;
+ } else {
WARN_ON_ONCE(state->rmid);
+ }
state->rmid = rmid;
- wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
-
- raw_spin_unlock_irqrestore(&state->lock, flags);
+ wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
}
static void intel_cqm_event_stop(struct perf_event *event, int mode)
{
- struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
- unsigned long flags;
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
if (event->hw.cqm_state & PERF_HES_STOPPED)
return;
event->hw.cqm_state |= PERF_HES_STOPPED;
- raw_spin_lock_irqsave(&state->lock, flags);
intel_cqm_event_read(event);
- if (!--state->cnt) {
+ if (!--state->rmid_usecnt) {
state->rmid = 0;
- wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+ wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
} else {
WARN_ON_ONCE(!state->rmid);
}
-
- raw_spin_unlock_irqrestore(&state->lock, flags);
}
static int intel_cqm_event_add(struct perf_event *event, int mode)
{
unsigned long flags;
- unsigned int rmid;
+ u32 rmid;
raw_spin_lock_irqsave(&cache_lock, flags);
@@ -1024,11 +1035,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
return 0;
}
-static void intel_cqm_event_del(struct perf_event *event, int mode)
-{
- intel_cqm_event_stop(event, mode);
-}
-
static void intel_cqm_event_destroy(struct perf_event *event)
{
struct perf_event *group_other = NULL;
@@ -1057,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
list_replace(&event->hw.cqm_groups_entry,
&group_other->hw.cqm_groups_entry);
} else {
- unsigned int rmid = event->hw.cqm_rmid;
+ u32 rmid = event->hw.cqm_rmid;
if (__rmid_valid(rmid))
__put_rmid(rmid);
@@ -1221,7 +1227,7 @@ static struct pmu intel_cqm_pmu = {
.task_ctx_nr = perf_sw_context,
.event_init = intel_cqm_event_init,
.add = intel_cqm_event_add,
- .del = intel_cqm_event_del,
+ .del = intel_cqm_event_stop,
.start = intel_cqm_event_start,
.stop = intel_cqm_event_stop,
.read = intel_cqm_event_read,
@@ -1243,12 +1249,12 @@ static inline void cqm_pick_event_reader(int cpu)
static void intel_cqm_cpu_prepare(unsigned int cpu)
{
- struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+ struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);
- raw_spin_lock_init(&state->lock);
state->rmid = 0;
- state->cnt = 0;
+ state->closid = 0;
+ state->rmid_usecnt = 0;
WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f73b3553e2e..71fc40238843 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -11,7 +11,7 @@
#define BTS_RECORD_SIZE 24
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE PAGE_SIZE
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
@@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
- int max, thresh = 1; /* always use a single PEBS record */
+ int max;
void *buffer, *ibuffer;
if (!x86_pmu.pebs)
@@ -280,9 +280,6 @@ static int alloc_pebs_buffer(int cpu)
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
- ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
- thresh * x86_pmu.pebs_record_size;
-
return 0;
}
@@ -549,6 +546,19 @@ int intel_pmu_drain_bts_buffer(void)
return 1;
}
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+ struct pt_regs regs;
+
+ x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ if (!sched_in)
+ intel_pmu_drain_pebs_buffer();
+}
+
/*
* PEBS
*/
@@ -684,25 +694,66 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
return &emptyconstraint;
}
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+ return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
+ bool first_pebs;
+ u64 threshold;
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+ first_pebs = !pebs_is_enabled(cpuc);
cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled |= 1ULL << 63;
+
+ /*
+ * When the event is constrained enough we can use a larger
+ * threshold and run the event with less frequent PMI.
+ */
+ if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+ threshold = ds->pebs_absolute_maximum -
+ x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+ if (first_pebs)
+ perf_sched_cb_inc(event->ctx->pmu);
+ } else {
+ threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+ /*
+ * If not all events can use larger buffer,
+ * roll back to threshold = 1
+ */
+ if (!first_pebs &&
+ (ds->pebs_interrupt_threshold > threshold))
+ perf_sched_cb_dec(event->ctx->pmu);
+ }
+
+ /* Use auto-reload if possible to save a MSR write in the PMI */
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ ds->pebs_event_reset[hwc->idx] =
+ (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+ }
+
+ if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+ ds->pebs_interrupt_threshold = threshold;
}
void intel_pmu_pebs_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -711,6 +762,13 @@ void intel_pmu_pebs_disable(struct perf_event *event)
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
+ if (ds->pebs_interrupt_threshold >
+ ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
+ intel_pmu_drain_pebs_buffer();
+ if (!pebs_is_enabled(cpuc))
+ perf_sched_cb_dec(event->ctx->pmu);
+ }
+
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -846,8 +904,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
return txn;
}
-static void __intel_pmu_pebs_event(struct perf_event *event,
- struct pt_regs *iregs, void *__pebs)
+static void setup_pebs_sample_data(struct perf_event *event,
+ struct pt_regs *iregs, void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
#define PERF_X86_EVENT_PEBS_HSW_PREC \
(PERF_X86_EVENT_PEBS_ST_HSW | \
@@ -859,13 +919,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
*/
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_record_hsw *pebs = __pebs;
- struct perf_sample_data data;
- struct pt_regs regs;
u64 sample_type;
int fll, fst, dsrc;
int fl = event->hw.flags;
- if (!intel_pmu_save_and_restart(event))
+ if (pebs == NULL)
return;
sample_type = event->attr.sample_type;
@@ -874,15 +932,15 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(data, 0, event->hw.last_period);
- data.period = event->hw.last_period;
+ data->period = event->hw.last_period;
/*
* Use latency for weight (only avail with PEBS-LL)
*/
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
- data.weight = pebs->lat;
+ data->weight = pebs->lat;
/*
* data.data_src encodes the data source
@@ -895,7 +953,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
val = precise_datala_hsw(event, pebs->dse);
else if (fst)
val = precise_store_data(pebs->dse);
- data.data_src.val = val;
+ data->data_src.val = val;
}
/*
@@ -908,61 +966,123 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
*/
- regs = *iregs;
- regs.flags = pebs->flags;
- set_linear_ip(&regs, pebs->ip);
- regs.bp = pebs->bp;
- regs.sp = pebs->sp;
+ *regs = *iregs;
+ regs->flags = pebs->flags;
+ set_linear_ip(regs, pebs->ip);
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
if (sample_type & PERF_SAMPLE_REGS_INTR) {
- regs.ax = pebs->ax;
- regs.bx = pebs->bx;
- regs.cx = pebs->cx;
- regs.dx = pebs->dx;
- regs.si = pebs->si;
- regs.di = pebs->di;
- regs.bp = pebs->bp;
- regs.sp = pebs->sp;
-
- regs.flags = pebs->flags;
+ regs->ax = pebs->ax;
+ regs->bx = pebs->bx;
+ regs->cx = pebs->cx;
+ regs->dx = pebs->dx;
+ regs->si = pebs->si;
+ regs->di = pebs->di;
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
+
+ regs->flags = pebs->flags;
#ifndef CONFIG_X86_32
- regs.r8 = pebs->r8;
- regs.r9 = pebs->r9;
- regs.r10 = pebs->r10;
- regs.r11 = pebs->r11;
- regs.r12 = pebs->r12;
- regs.r13 = pebs->r13;
- regs.r14 = pebs->r14;
- regs.r15 = pebs->r15;
+ regs->r8 = pebs->r8;
+ regs->r9 = pebs->r9;
+ regs->r10 = pebs->r10;
+ regs->r11 = pebs->r11;
+ regs->r12 = pebs->r12;
+ regs->r13 = pebs->r13;
+ regs->r14 = pebs->r14;
+ regs->r15 = pebs->r15;
#endif
}
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- regs.ip = pebs->real_ip;
- regs.flags |= PERF_EFLAGS_EXACT;
- } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
- regs.flags |= PERF_EFLAGS_EXACT;
+ regs->ip = pebs->real_ip;
+ regs->flags |= PERF_EFLAGS_EXACT;
+ } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+ regs->flags |= PERF_EFLAGS_EXACT;
else
- regs.flags &= ~PERF_EFLAGS_EXACT;
+ regs->flags &= ~PERF_EFLAGS_EXACT;
if ((sample_type & PERF_SAMPLE_ADDR) &&
x86_pmu.intel_cap.pebs_format >= 1)
- data.addr = pebs->dla;
+ data->addr = pebs->dla;
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
- data.weight = intel_hsw_weight(pebs);
+ data->weight = intel_hsw_weight(pebs);
if (sample_type & PERF_SAMPLE_TRANSACTION)
- data.txn = intel_hsw_transaction(pebs);
+ data->txn = intel_hsw_transaction(pebs);
}
if (has_branch_stack(event))
- data.br_stack = &cpuc->lbr_stack;
+ data->br_stack = &cpuc->lbr_stack;
+}
+
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ void *at;
+ u64 pebs_status;
+
+ if (base == NULL)
+ return NULL;
+
+ for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+ struct pebs_record_nhm *p = at;
+
+ if (test_bit(bit, (unsigned long *)&p->status)) {
+ /* PEBS v3 has accurate status bits */
+ if (x86_pmu.intel_cap.pebs_format >= 3)
+ return at;
- if (perf_event_overflow(event, &data, &regs))
+ if (p->status == (1 << bit))
+ return at;
+
+ /* clear non-PEBS bit and re-check */
+ pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+ if (pebs_status == (1 << bit))
+ return at;
+ }
+ }
+ return NULL;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+ struct pt_regs *iregs,
+ void *base, void *top,
+ int bit, int count)
+{
+ struct perf_sample_data data;
+ struct pt_regs regs;
+ void *at = get_next_pebs_record_by_bit(base, top, bit);
+
+ if (!intel_pmu_save_and_restart(event) &&
+ !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+ return;
+
+ while (count > 1) {
+ setup_pebs_sample_data(event, iregs, at, &data, &regs);
+ perf_event_output(event, &data, &regs);
+ at += x86_pmu.pebs_record_size;
+ at = get_next_pebs_record_by_bit(at, top, bit);
+ count--;
+ }
+
+ setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+ /*
+ * All but the last records are processed.
+ * The last one is left to be able to call the overflow handler.
+ */
+ if (perf_event_overflow(event, &data, &regs)) {
x86_pmu_stop(event, 0);
+ return;
+ }
+
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
@@ -992,72 +1112,99 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
if (!event->attr.precise_ip)
return;
- n = top - at;
+ n = (top - at) / x86_pmu.pebs_record_size;
if (n <= 0)
return;
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
- at += n - 1;
-
- __intel_pmu_pebs_event(event, iregs, at);
+ __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
}
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
- struct perf_event *event = NULL;
- void *at, *top;
- u64 status = 0;
- int bit;
+ struct perf_event *event;
+ void *base, *at, *top;
+ short counts[MAX_PEBS_EVENTS] = {};
+ short error[MAX_PEBS_EVENTS] = {};
+ int bit, i;
if (!x86_pmu.pebs_active)
return;
- at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
ds->pebs_index = ds->pebs_buffer_base;
- if (unlikely(at > top))
+ if (unlikely(base >= top))
return;
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
- "Unexpected number of pebs records %ld\n",
- (long)(top - at) / x86_pmu.pebs_record_size);
-
- for (; at < top; at += x86_pmu.pebs_record_size) {
+ for (at = base; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
- for_each_set_bit(bit, (unsigned long *)&p->status,
- x86_pmu.max_pebs_events) {
- event = cpuc->events[bit];
- if (!test_bit(bit, cpuc->active_mask))
- continue;
-
- WARN_ON_ONCE(!event);
+ /* PEBS v3 has accurate status bits */
+ if (x86_pmu.intel_cap.pebs_format >= 3) {
+ for_each_set_bit(bit, (unsigned long *)&p->status,
+ MAX_PEBS_EVENTS)
+ counts[bit]++;
- if (!event->attr.precise_ip)
- continue;
+ continue;
+ }
- if (__test_and_set_bit(bit, (unsigned long *)&status))
+ bit = find_first_bit((unsigned long *)&p->status,
+ x86_pmu.max_pebs_events);
+ if (bit >= x86_pmu.max_pebs_events)
+ continue;
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+ /*
+ * The PEBS hardware does not deal well with the situation
+ * when events happen near to each other and multiple bits
+ * are set. But it should happen rarely.
+ *
+ * If these events include one PEBS and multiple non-PEBS
+ * events, it doesn't impact PEBS record. The record will
+ * be handled normally. (slow path)
+ *
+ * If these events include two or more PEBS events, the
+ * records for the events can be collapsed into a single
+ * one, and it's not possible to reconstruct all events
+ * that caused the PEBS record. It's called collision.
+ * If collision happened, the record will be dropped.
+ *
+ */
+ if (p->status != (1 << bit)) {
+ u64 pebs_status;
+
+ /* slow path */
+ pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+ if (pebs_status != (1 << bit)) {
+ for_each_set_bit(i, (unsigned long *)&pebs_status,
+ MAX_PEBS_EVENTS)
+ error[i]++;
continue;
-
- break;
+ }
}
+ counts[bit]++;
+ }
- if (!event || bit >= x86_pmu.max_pebs_events)
+ for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+ if ((counts[bit] == 0) && (error[bit] == 0))
continue;
+ event = cpuc->events[bit];
+ WARN_ON_ONCE(!event);
+ WARN_ON_ONCE(!event->attr.precise_ip);
- __intel_pmu_pebs_event(event, iregs, at);
+ /* log dropped samples number */
+ if (error[bit])
+ perf_log_lost_samples(event, error[bit]);
+
+ if (counts[bit]) {
+ __intel_pmu_pebs_event(event, iregs, base,
+ top, bit, counts[bit]);
+ }
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 94e5b506caa6..452a7bd2dedb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -96,6 +96,7 @@ enum {
X86_BR_NO_TX = 1 << 14,/* not in transaction */
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
X86_BR_CALL_STACK = 1 << 16,/* call stack */
+ X86_BR_IND_JMP = 1 << 17,/* indirect jump */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -113,6 +114,7 @@ enum {
X86_BR_IRQ |\
X86_BR_ABORT |\
X86_BR_IND_CALL |\
+ X86_BR_IND_JMP |\
X86_BR_ZERO_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -262,9 +264,6 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx;
- if (!x86_pmu.lbr_nr)
- return;
-
/*
* If LBR callstack feature is enabled and the stack was saved when
* the task was scheduled out, restore the stack. Otherwise flush
@@ -523,6 +522,9 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
X86_BR_CALL_STACK;
}
+ if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+ mask |= X86_BR_IND_JMP;
+
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
@@ -736,7 +738,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
break;
case 4:
case 5:
- ret = X86_BR_JMP;
+ ret = X86_BR_IND_JMP;
break;
}
break;
@@ -844,6 +846,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
*/
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
};
static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -856,6 +859,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
| LBR_FAR,
[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
};
static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -870,6 +874,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
| LBR_RETURN | LBR_CALL_STACK,
+ [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
};
/* core */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 123ff1bb2f60..159887c3a89d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
-static bool pt_is_running(void)
-{
- u64 ctl;
-
- rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-
- return !!(ctl & RTIT_CTL_TRACEEN);
-}
-
static void pt_config(struct perf_event *event)
{
u64 reg;
@@ -609,7 +600,12 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
* @handle: Current output handle.
*
* Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected.
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
*/
static int pt_buffer_reset_markers(struct pt_buffer *buf,
struct perf_output_handle *handle)
@@ -618,9 +614,6 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
unsigned long head = local64_read(&buf->head);
unsigned long idx, npages, wakeup;
- if (buf->snapshot)
- return 0;
-
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 <
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
@@ -674,7 +667,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
struct topa *cur = buf->first, *prev = buf->last;
struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
*te_prev = TOPA_ENTRY(prev, prev->last - 1);
- int pg = 0, idx = 0, ntopa = 0;
+ int pg = 0, idx = 0;
while (pg < buf->nr_pages) {
int tidx;
@@ -689,9 +682,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
/* advance to next topa table */
idx = 0;
cur = list_entry(cur->list.next, struct topa, list);
- ntopa++;
- } else
+ } else {
idx++;
+ }
te_cur = TOPA_ENTRY(cur, idx);
}
@@ -703,7 +696,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
* @head: Write pointer (aux_head) from AUX buffer.
*
* Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly.
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
*/
static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
{
@@ -901,6 +901,7 @@ void intel_pt_interrupt(void)
}
pt_buffer_reset_offsets(buf, pt->handle.head);
+ /* snapshot counters don't use PMI, so it's safe */
ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) {
perf_aux_output_end(&pt->handle, 0, true);
@@ -923,7 +924,7 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
- if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
+ if (!buf || pt_buffer_is_full(buf, pt)) {
event->hw.state = PERF_HES_STOPPED;
return;
}
@@ -954,7 +955,6 @@ static void pt_event_stop(struct perf_event *event, int mode)
event->hw.state = PERF_HES_STOPPED;
if (mode & PERF_EF_UPDATE) {
- struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
if (!buf)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 90b7c501c95b..7c1de1610178 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -922,6 +922,9 @@ static int __init uncore_pci_init(void)
case 69: /* Haswell Celeron */
ret = hsw_uncore_pci_init();
break;
+ case 61: /* Broadwell */
+ ret = bdw_uncore_pci_init();
+ break;
default:
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index ceac8f5dc018..0f77f0a196e4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -325,6 +325,7 @@ extern struct event_constraint uncore_constraint_empty;
int snb_uncore_pci_init(void);
int ivb_uncore_pci_init(void);
int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 4562e9e22c60..b005a78c7012 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -7,6 +7,7 @@
#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -486,6 +487,14 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
{ /* end: all zeroes */ },
};
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
static struct pci_driver snb_uncore_pci_driver = {
.name = "snb_uncore",
.id_table = snb_uncore_pci_ids,
@@ -501,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = {
.id_table = hsw_uncore_pci_ids,
};
+static struct pci_driver bdw_uncore_pci_driver = {
+ .name = "bdw_uncore",
+ .id_table = bdw_uncore_pci_ids,
+};
+
struct imc_uncore_pci_dev {
__u32 pci_id;
struct pci_driver *driver;
@@ -514,6 +528,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
+ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
{ /* end marker */ }
};
@@ -561,6 +576,11 @@ int hsw_uncore_pci_init(void)
return imc_uncore_pci_init();
}
+int bdw_uncore_pci_init(void)
+{
+ return imc_uncore_pci_init();
+}
+
/* end of Sandy Bridge uncore support */
/* Nehalem uncore support */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620062df..1681504e44a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void kvm_wait(u8 *ptr, u8 val)
+{
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+
+ local_irq_save(flags);
+
+ if (READ_ONCE(*ptr) != val)
+ goto out;
+
+ /*
+ * halt until it's our turn and kicked. Note that we do safe halt
+ * for irq enabled case to avoid hang when lock info is overwritten
+ * in irq spinlock slowpath and no spurious interrupt occur to save us.
+ */
+ if (arch_irqs_disabled_flags(flags))
+ halt();
+ else
+ safe_halt();
+
+out:
+ local_irq_restore(flags);
+}
+
+#else /* !CONFIG_QUEUED_SPINLOCKS */
+
enum kvm_contention_stat {
TAKEN_SLOW,
TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
}
}
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+
/*
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ __pv_init_lock_hash();
+ pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_lock_ops.wait = kvm_wait;
+ pv_lock_ops.kick = kvm_kick_cpu;
+#else /* !CONFIG_QUEUED_SPINLOCKS */
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
pv_lock_ops.unlock_kick = kvm_unlock_kick;
+#endif
}
static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index bbb6c7316341..33ee3e0efd65 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
#include <asm/paravirt.h>
+#ifdef CONFIG_QUEUED_SPINLOCKS
+__visible void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+
+PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
+
+bool pv_is_native_spin_unlock(void)
+{
+ return pv_lock_ops.queued_spin_unlock.func ==
+ __raw_callee_save___native_queued_spin_unlock;
+}
+#endif
+
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
+ .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
+ .wait = paravirt_nop,
+ .kick = paravirt_nop,
+#else /* !CONFIG_QUEUED_SPINLOCKS */
.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
.unlock_kick = paravirt_nop,
-#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
+#endif /* SMP */
};
EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab6..e1b013696dde 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+#endif
+
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
/* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
return 0;
}
+extern bool pv_is_native_spin_unlock(void);
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_cpu_ops, read_tsc);
-
- patch_site:
- ret = paravirt_patch_insns(ibuf, len, start, end);
- break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ if (pv_is_native_spin_unlock()) {
+ start = start_pv_lock_ops_queued_spin_unlock;
+ end = end_pv_lock_ops_queued_spin_unlock;
+ goto patch_site;
+ }
+#endif
default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
+
+patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
}
#undef PATCH_SITE
return ret;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index a1da6737ba5b..a1fa86782186 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
DEF_NATIVE(, mov64, "mov %rdi, %rax");
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+#endif
+
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
{
return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
start__mov64, end__mov64);
}
+extern bool pv_is_native_spin_unlock(void);
+
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
{
@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_cpu_ops, clts);
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
-
- patch_site:
- ret = paravirt_patch_insns(ibuf, len, start, end);
- break;
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+ case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+ if (pv_is_native_spin_unlock()) {
+ start = start_pv_lock_ops_queued_spin_unlock;
+ end = end_pv_lock_ops_queued_spin_unlock;
+ goto patch_site;
+ }
+#endif
default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
+
+patch_site:
+ ret = paravirt_patch_insns(ibuf, len, start, end);
+ break;
}
#undef PATCH_SITE
return ret;
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 7e8a1a650435..b9531d343134 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
#define smp_mb() barrier()
#define smp_rmb() barrier()
#define smp_wmb() barrier()
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
+
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
#define read_barrier_depends() do { } while (0)
#define smp_read_barrier_depends() do { } while (0)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 956374c1edbc..9e2ba5c6e1dd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
#include "xen-ops.h"
#include "debugfs.h"
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(char *, irq_name);
+static bool xen_pvspin = true;
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+
+#include <asm/qspinlock.h>
+
+static void xen_qlock_kick(int cpu)
+{
+ xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+}
+
+/*
+ * Halt the current CPU & release it back to the host
+ */
+static void xen_qlock_wait(u8 *byte, u8 val)
+{
+ int irq = __this_cpu_read(lock_kicker_irq);
+
+ /* If kicker interrupts not initialized yet, just spin */
+ if (irq == -1)
+ return;
+
+ /* clear pending */
+ xen_clear_irq_pending(irq);
+ barrier();
+
+ /*
+ * We check the byte value after clearing pending IRQ to make sure
+ * that we won't miss a wakeup event because of the clearing.
+ *
+ * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
+ * So it is effectively a memory barrier for x86.
+ */
+ if (READ_ONCE(*byte) != val)
+ return;
+
+ /*
+ * If an interrupt happens here, it will leave the wakeup irq
+ * pending, which will cause xen_poll_irq() to return
+ * immediately.
+ */
+
+ /* Block until irq becomes pending (or perhaps a spurious wakeup) */
+ xen_poll_irq(irq);
+}
+
+#else /* CONFIG_QUEUED_SPINLOCKS */
+
enum xen_contention_stat {
TAKEN_SLOW,
TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
__ticket_t want;
};
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
-static DEFINE_PER_CPU(char *, irq_name);
static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
static cpumask_t waiting_cpus;
-static bool xen_pvspin = true;
__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
{
int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
}
}
}
+#endif /* CONFIG_QUEUED_SPINLOCKS */
static irqreturn_t dummy_handler(int irq, void *dev_id)
{
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
return;
}
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
+#ifdef CONFIG_QUEUED_SPINLOCKS
+ __pv_init_lock_hash();
+ pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
+ pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
+ pv_lock_ops.wait = xen_qlock_wait;
+ pv_lock_ops.kick = xen_qlock_kick;
+#else
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
+#endif
}
/*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
}
early_param("xen_nopvspin", xen_parse_nopvspin);
-#ifdef CONFIG_XEN_DEBUG_FS
+#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
static struct dentry *d_spin_debug;