summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2017-04-05 17:54:50 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2017-04-10 21:41:34 +1000
commit243e25112d06b348f087a6f7aba4bbc288285bdd (patch)
treec4f605e98e6e8cf6dbe730c946142943b82ac38e /arch/powerpc/include
parenta978e13965a40ac07163643cc3fa0ddb0d354198 (diff)
downloadlinux-243e25112d06b348f087a6f7aba4bbc288285bdd.tar.bz2
powerpc/xive: Native exploitation of the XIVE interrupt controller
The XIVE interrupt controller is the new interrupt controller found in POWER9. It supports advanced virtualization capabilities among other things. Currently we use a set of firmware calls that simulate the old "XICS" interrupt controller but this is fairly inefficient. This adds the framework for using XIVE along with a native backend which OPAL for configuration. Later, a backend allowing the use in a KVM or PowerVM guest will also be provided. This disables some fast path for interrupts in KVM when XIVE is enabled as these rely on the firmware emulation code which is no longer available when the XIVE is used natively by Linux. A latter patch will make KVM also directly exploit the XIVE, thus recovering the lost performance (and more). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [mpe: Fixup pr_xxx("XIVE:"...), don't split pr_xxx() strings, tweak Kconfig so XIVE_NATIVE selects XIVE and depends on POWERNV, fix build errors when SMP=n, fold in fixes from Ben: Don't call cpu_online() on an invalid CPU number Fix irq target selection returning out of bounds cpu# Extra sanity checks on cpu numbers ] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/include')
-rw-r--r--arch/powerpc/include/asm/xive-regs.h97
-rw-r--r--arch/powerpc/include/asm/xive.h163
-rw-r--r--arch/powerpc/include/asm/xmon.h2
3 files changed, 262 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
new file mode 100644
index 000000000000..1d3f2be5ae39
--- /dev/null
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_POWERPC_XIVE_REGS_H
+#define _ASM_POWERPC_XIVE_REGS_H
+
+/*
+ * Thread Management (aka "TM") registers
+ */
+
+/* TM register offsets */
+#define TM_QW0_USER 0x000 /* All rings */
+#define TM_QW1_OS 0x010 /* Ring 0..2 */
+#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */
+#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */
+
+/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */
+#define TM_NSR 0x0 /* + + - + */
+#define TM_CPPR 0x1 /* - + - + */
+#define TM_IPB 0x2 /* - + + + */
+#define TM_LSMFB 0x3 /* - + + + */
+#define TM_ACK_CNT 0x4 /* - + - - */
+#define TM_INC 0x5 /* - + - + */
+#define TM_AGE 0x6 /* - + - + */
+#define TM_PIPR 0x7 /* - + - + */
+
+#define TM_WORD0 0x0
+#define TM_WORD1 0x4
+
+/*
+ * QW word 2 contains the valid bit at the top and other fields
+ * depending on the QW.
+ */
+#define TM_WORD2 0x8
+#define TM_QW0W2_VU PPC_BIT32(0)
+#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1,31) // XX 2,31 ?
+#define TM_QW1W2_VO PPC_BIT32(0)
+#define TM_QW1W2_OS_CAM PPC_BITMASK32(8,31)
+#define TM_QW2W2_VP PPC_BIT32(0)
+#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8,31)
+#define TM_QW3W2_VT PPC_BIT32(0)
+#define TM_QW3W2_LP PPC_BIT32(6)
+#define TM_QW3W2_LE PPC_BIT32(7)
+#define TM_QW3W2_T PPC_BIT32(31)
+
+/*
+ * In addition to normal loads to "peek" and writes (only when invalid)
+ * using 4 and 8 bytes accesses, the above registers support these
+ * "special" byte operations:
+ *
+ * - Byte load from QW0[NSR] - User level NSR (EBB)
+ * - Byte store to QW0[NSR] - User level NSR (EBB)
+ * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
+ * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
+ * otherwise VT||0000000
+ * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
+ *
+ * Then we have all these "special" CI ops at these offset that trigger
+ * all sorts of side effects:
+ */
+#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/
+#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */
+#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */
+#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user context */
+#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */
+#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS context to reg */
+#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate Pool context to reg*/
+#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */
+#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd line */
+#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */
+#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even line */
+#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */
+/* XXX more... */
+
+/* NSR fields for the various QW ack types */
+#define TM_QW0_NSR_EB PPC_BIT8(0)
+#define TM_QW1_NSR_EO PPC_BIT8(0)
+#define TM_QW3_NSR_HE PPC_BITMASK8(0,1)
+#define TM_QW3_NSR_HE_NONE 0
+#define TM_QW3_NSR_HE_POOL 1
+#define TM_QW3_NSR_HE_PHYS 2
+#define TM_QW3_NSR_HE_LSI 3
+#define TM_QW3_NSR_I PPC_BIT8(2)
+#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3,7)
+
+/* Utilities to manipulate these (originaly from OPAL) */
+#define MASK_TO_LSH(m) (__builtin_ffsl(m) - 1)
+#define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m))
+#define SETFIELD(m, v, val) \
+ (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))
+
+#endif /* _ASM_POWERPC_XIVE_REGS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
new file mode 100644
index 000000000000..3cdbeaeac397
--- /dev/null
+++ b/arch/powerpc/include/asm/xive.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_POWERPC_XIVE_H
+#define _ASM_POWERPC_XIVE_H
+
+#define XIVE_INVALID_VP 0xffffffff
+
+#ifdef CONFIG_PPC_XIVE
+
+/*
+ * Thread Interrupt Management Area (TIMA)
+ *
+ * This is a global MMIO region divided in 4 pages of varying access
+ * permissions, providing access to per-cpu interrupt management
+ * functions. It always identifies the CPU doing the access based
+ * on the PowerBus initiator ID, thus we always access via the
+ * same offset regardless of where the code is executing
+ */
+extern void __iomem *xive_tima;
+
+/*
+ * Offset in the TM area of our current execution level (provided by
+ * the backend)
+ */
+extern u32 xive_tima_offset;
+
+/*
+ * Per-irq data (irq_get_handler_data for normal IRQs), IPIs
+ * have it stored in the xive_cpu structure. We also cache
+ * for normal interrupts the current target CPU.
+ *
+ * This structure is setup by the backend for each interrupt.
+ */
+struct xive_irq_data {
+ u64 flags;
+ u64 eoi_page;
+ void __iomem *eoi_mmio;
+ u64 trig_page;
+ void __iomem *trig_mmio;
+ u32 esb_shift;
+ int src_chip;
+
+ /* Setup/used by frontend */
+ int target;
+ bool saved_p;
+};
+#define XIVE_IRQ_FLAG_STORE_EOI 0x01
+#define XIVE_IRQ_FLAG_LSI 0x02
+#define XIVE_IRQ_FLAG_SHIFT_BUG 0x04
+#define XIVE_IRQ_FLAG_MASK_FW 0x08
+#define XIVE_IRQ_FLAG_EOI_FW 0x10
+
+#define XIVE_INVALID_CHIP_ID -1
+
+/* A queue tracking structure in a CPU */
+struct xive_q {
+ __be32 *qpage;
+ u32 msk;
+ u32 idx;
+ u32 toggle;
+ u64 eoi_phys;
+ u32 esc_irq;
+ atomic_t count;
+ atomic_t pending_count;
+};
+
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_GET 0x800
+#define XIVE_ESB_SET_PQ_00 0xc00
+#define XIVE_ESB_SET_PQ_01 0xd00
+#define XIVE_ESB_SET_PQ_10 0xe00
+#define XIVE_ESB_SET_PQ_11 0xf00
+#define XIVE_ESB_MASK XIVE_ESB_SET_PQ_01
+
+#define XIVE_ESB_VAL_P 0x2
+#define XIVE_ESB_VAL_Q 0x1
+
+/* Global enable flags for the XIVE support */
+extern bool __xive_enabled;
+
+static inline bool xive_enabled(void) { return __xive_enabled; }
+
+extern bool xive_native_init(void);
+extern void xive_smp_probe(void);
+extern int xive_smp_prepare_cpu(unsigned int cpu);
+extern void xive_smp_setup_cpu(void);
+extern void xive_smp_disable_cpu(void);
+extern void xive_kexec_teardown_cpu(int secondary);
+extern void xive_shutdown(void);
+extern void xive_flush_interrupt(void);
+
+/* xmon hook */
+extern void xmon_xive_do_dump(int cpu);
+
+/* APIs used by KVM */
+extern u32 xive_native_default_eq_shift(void);
+extern u32 xive_native_alloc_vp_block(u32 max_vcpus);
+extern void xive_native_free_vp_block(u32 vp_base);
+extern int xive_native_populate_irq_data(u32 hw_irq,
+ struct xive_irq_data *data);
+extern void xive_cleanup_irq_data(struct xive_irq_data *xd);
+extern u32 xive_native_alloc_irq(void);
+extern void xive_native_free_irq(u32 irq);
+extern int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+
+extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
+ __be32 *qpage, u32 order, bool can_escalate);
+extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
+
+extern bool __xive_irq_trigger(struct xive_irq_data *xd);
+extern bool __xive_irq_retrigger(struct xive_irq_data *xd);
+extern void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd);
+
+extern bool is_xive_irq(struct irq_chip *chip);
+
+#else
+
+static inline bool xive_enabled(void) { return false; }
+
+static inline bool xive_native_init(void) { return false; }
+static inline void xive_smp_probe(void) { }
+extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
+static inline void xive_smp_setup_cpu(void) { }
+static inline void xive_smp_disable_cpu(void) { }
+static inline void xive_kexec_teardown_cpu(int secondary) { }
+static inline void xive_shutdown(void) { }
+static inline void xive_flush_interrupt(void) { }
+
+static inline u32 xive_native_alloc_vp_block(u32 max_vcpus) { return XIVE_INVALID_VP; }
+static inline void xive_native_free_vp_block(u32 vp_base) { }
+
+#endif
+
+#endif /* _ASM_POWERPC_XIVE_H */
diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h
index 5eb8e599e5cc..eb42a0c6e1d9 100644
--- a/arch/powerpc/include/asm/xmon.h
+++ b/arch/powerpc/include/asm/xmon.h
@@ -29,5 +29,7 @@ static inline void xmon_register_spus(struct list_head *list) { };
extern int cpus_are_in_xmon(void);
#endif
+extern void xmon_printf(const char *format, ...);
+
#endif /* __KERNEL __ */
#endif /* __ASM_POWERPC_XMON_H */