From 2bdd424f26be1c98b6e3d9acfffb5559c131c888 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 12 Dec 2012 19:20:52 +0000 Subject: ARM: psci: add support for PSCI invocations from the kernel This patch adds support for the Power State Coordination Interface defined by ARM, allowing Linux to request CPU-centric power-management operations from firmware implementing the PSCI protocol. Acked-by: Arnd Bergmann Acked-by: Nicolas Pitre Signed-off-by: Will Deacon --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/psci.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 arch/arm/kernel/psci.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5bbec7b8183e..5f3338eacad2 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -82,5 +82,6 @@ obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o +obj-$(CONFIG_ARM_PSCI) += psci.o extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c new file mode 100644 index 000000000000..36531643cc2c --- /dev/null +++ b/arch/arm/kernel/psci.c @@ -0,0 +1,211 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon + */ + +#define pr_fmt(fmt) "psci: " fmt + +#include +#include + +#include +#include +#include +#include +#include + +struct psci_operations psci_ops; + +static int (*invoke_psci_fn)(u32, u32, u32, u32); + +enum psci_function { + PSCI_FN_CPU_SUSPEND, + PSCI_FN_CPU_ON, + PSCI_FN_CPU_OFF, + PSCI_FN_MIGRATE, + PSCI_FN_MAX, +}; + +static u32 psci_function_id[PSCI_FN_MAX]; + +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_EOPNOTSUPP -1 +#define PSCI_RET_EINVAL -2 +#define PSCI_RET_EPERM -3 + +static int psci_to_linux_errno(int errno) +{ + switch (errno) { + case PSCI_RET_SUCCESS: + return 0; + case PSCI_RET_EOPNOTSUPP: + return -EOPNOTSUPP; + case PSCI_RET_EINVAL: + return -EINVAL; + case PSCI_RET_EPERM: + return -EPERM; + }; + + return -EINVAL; +} + +#define PSCI_POWER_STATE_ID_MASK 0xffff +#define PSCI_POWER_STATE_ID_SHIFT 0 +#define PSCI_POWER_STATE_TYPE_MASK 0x1 +#define PSCI_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_POWER_STATE_AFFL_MASK 0x3 +#define PSCI_POWER_STATE_AFFL_SHIFT 24 + +static u32 psci_power_state_pack(struct psci_power_state state) +{ + return ((state.id & PSCI_POWER_STATE_ID_MASK) + << PSCI_POWER_STATE_ID_SHIFT) | + ((state.type & PSCI_POWER_STATE_TYPE_MASK) + << PSCI_POWER_STATE_TYPE_SHIFT) | + ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) + << PSCI_POWER_STATE_AFFL_SHIFT); +} + +/* + * The following two functions are invoked via the invoke_psci_fn pointer + * and will not be inlined, allowing us to piggyback on the AAPCS. + */ +static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, + u32 arg2) +{ + asm volatile( + __asmeq("%0", "r0") + __asmeq("%1", "r1") + __asmeq("%2", "r2") + __asmeq("%3", "r3") + __HVC(0) + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, + u32 arg2) +{ + asm volatile( + __asmeq("%0", "r0") + __asmeq("%1", "r1") + __asmeq("%2", "r2") + __asmeq("%3", "r3") + __SMC(0) + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static int psci_cpu_suspend(struct psci_power_state state, + unsigned long entry_point) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_off(struct psci_power_state state) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_OFF]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_CPU_ON]; + err = invoke_psci_fn(fn, cpuid, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_migrate(unsigned long cpuid) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE]; + err = invoke_psci_fn(fn, cpuid, 0, 0); + return psci_to_linux_errno(err); +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", }, + {}, +}; + +static int __init psci_init(void) +{ + struct device_node *np; + const char *method; + u32 id; + + np = of_find_matching_node(NULL, psci_of_match); + if (!np) + return 0; + + pr_info("probing function IDs from device-tree\n"); + + if (of_property_read_string(np, "method", &method)) { + pr_warning("missing \"method\" property\n"); + goto out_put_node; + } + + if (!strcmp("hvc", method)) { + invoke_psci_fn = __invoke_psci_fn_hvc; + } else if (!strcmp("smc", method)) { + invoke_psci_fn = __invoke_psci_fn_smc; + } else { + pr_warning("invalid \"method\" property: %s\n", method); + goto out_put_node; + } + + if (!of_property_read_u32(np, "cpu_suspend", &id)) { + psci_function_id[PSCI_FN_CPU_SUSPEND] = id; + psci_ops.cpu_suspend = psci_cpu_suspend; + } + + if (!of_property_read_u32(np, "cpu_off", &id)) { + psci_function_id[PSCI_FN_CPU_OFF] = id; + psci_ops.cpu_off = psci_cpu_off; + } + + if (!of_property_read_u32(np, "cpu_on", &id)) { + psci_function_id[PSCI_FN_CPU_ON] = id; + psci_ops.cpu_on = psci_cpu_on; + } + + if (!of_property_read_u32(np, "migrate", &id)) { + psci_function_id[PSCI_FN_MIGRATE] = id; + psci_ops.migrate = psci_migrate; + } + +out_put_node: + of_node_put(np); + return 0; +} +early_initcall(psci_init); -- cgit v1.2.3 From 02051ead976d854df1de726841c4a646826ec860 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Sun, 14 Oct 2012 20:23:04 +0100 Subject: ARM: coresight: common definition for (OS) Lock Access Register key value Coresight components and debug are using a common lock control mechansim. Writing 0xC5ACCE55 to the Lock Access Register (LAR) in case of a coresight components enables further access to the coresight device registers. Writing any other value to it removes the write access. Writing 0xC5ACCE55 to the OS Lock Access Register (OSLAR) in case of debug locks the debug register for further access to the debug registers. Writing any other value to it unlocks the debug registers. Unfortunately, the existing coresight code uses the terms lock and unlock the other way around. Unlocking stands for enabling write access and locking for removing write access. That is why the definition of the LAR and OSLAR key value has been changed to CS_LAR_KEY. Signed-off-by: Dietmar Eggemann Signed-off-by: Will Deacon --- arch/arm/include/asm/cti.h | 10 +++------- arch/arm/include/asm/hardware/coresight.h | 6 +++--- arch/arm/kernel/hw_breakpoint.c | 5 +++-- 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/include/asm/cti.h b/arch/arm/include/asm/cti.h index f2e5cad3f306..2381199acb7d 100644 --- a/arch/arm/include/asm/cti.h +++ b/arch/arm/include/asm/cti.h @@ -2,6 +2,7 @@ #define __ASMARM_CTI_H #include +#include /* The registers' definition is from section 3.2 of * Embedded Cross Trigger Revision: r0p0 @@ -35,11 +36,6 @@ #define LOCKACCESS 0xFB0 #define LOCKSTATUS 0xFB4 -/* write this value to LOCKACCESS will unlock the module, and - * other value will lock the module - */ -#define LOCKCODE 0xC5ACCE55 - /** * struct cti - cross trigger interface struct * @base: mapped virtual address for the cti base @@ -146,7 +142,7 @@ static inline void cti_irq_ack(struct cti *cti) */ static inline void cti_unlock(struct cti *cti) { - __raw_writel(LOCKCODE, cti->base + LOCKACCESS); + __raw_writel(CS_LAR_KEY, cti->base + LOCKACCESS); } /** @@ -158,6 +154,6 @@ static inline void cti_unlock(struct cti *cti) */ static inline void cti_lock(struct cti *cti) { - __raw_writel(~LOCKCODE, cti->base + LOCKACCESS); + __raw_writel(~CS_LAR_KEY, cti->base + LOCKACCESS); } #endif diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index 7ecd793b8f5a..0cf7a6b842ff 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h @@ -36,7 +36,7 @@ /* CoreSight Component Registers */ #define CSCR_CLASS 0xff4 -#define UNLOCK_MAGIC 0xc5acce55 +#define CS_LAR_KEY 0xc5acce55 /* ETM control register, "ETM Architecture", 3.3.1 */ #define ETMR_CTRL 0 @@ -147,11 +147,11 @@ #define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) #define etm_unlock(t) \ - do { etm_writel((t), UNLOCK_MAGIC, CSMR_LOCKACCESS); } while (0) + do { etm_writel((t), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0) #define etb_lock(t) do { etb_writel((t), 0, CSMR_LOCKACCESS); } while (0) #define etb_unlock(t) \ - do { etb_writel((t), UNLOCK_MAGIC, CSMR_LOCKACCESS); } while (0) + do { etb_writel((t), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0) #endif /* __ASM_HARDWARE_CORESIGHT_H */ diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 5ff2e77782b1..34e9375d96a6 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -35,6 +35,7 @@ #include #include #include +#include /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -955,9 +956,9 @@ static void reset_ctrl_regs(void *unused) /* * Unconditionally clear the OS lock by writing a value - * other than 0xC5ACCE55 to the access register. + * other than CS_LAR_KEY to the access register. */ - ARM_DBG_WRITE(c1, c0, 4, 0); + ARM_DBG_WRITE(c1, c0, 4, ~CS_LAR_KEY); isb(); /* -- cgit v1.2.3 From 57ba899731156ab01cdb7dae8d1fe6430ef4957c Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Sun, 14 Oct 2012 21:08:14 +0100 Subject: ARM: hw_breakpoint: Check function for OS Save and Restore mechanism v7 debug introduced OS Save and Restore mechanism. On a v7 debug SinglePower system, i.e a system without a separate core and debug power domain, which does not support external debug over powerdown, it is implementation defined whether OS Save and Restore is implemented. v7.1 debug requires OS Save and Restore mechanism. v6 debug and v6.1 debug do not implement it. A new global variable bool has_ossr is introduced and is determined in arch_hw_breakpoint_init() like debug_arch or the number of BRPs/WRPs. The logic how to check if OS Save and Restore is supported has changed with this patch. In reset_ctrl_regs() a mask consisting of OSLM[1] (OSLSR.3) and OSLM[0] (OSLSR.0) was used to check if the system supports OS Save and Restore. In the new function core_has_os_save_restore() only OSLM[0] is used. It is not necessary to check OSLM[1] too since it is v7.1 debug specific and v7.1 debug requires OS Save and Restore and thus OS Lock. Signed-off-by: Dietmar Eggemann Signed-off-by: Will Deacon --- arch/arm/include/asm/hw_breakpoint.h | 3 +++ arch/arm/kernel/hw_breakpoint.c | 28 +++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 01169dd723f1..eef55ea9ef00 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -85,6 +85,9 @@ static inline void decode_ctrl_reg(u32 reg, #define ARM_DSCR_HDBGEN (1 << 14) #define ARM_DSCR_MDBGEN (1 << 15) +/* OSLSR os lock model bits */ +#define ARM_OSLSR_OSLM0 (1 << 0) + /* opcode2 numbers for the co-processor instructions. */ #define ARM_OP2_BVR 4 #define ARM_OP2_BCR 5 diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 34e9375d96a6..201d4406fe0d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -50,6 +50,9 @@ static int core_num_wrps; /* Debug architecture version. */ static u8 debug_arch; +/* Does debug architecture support OS Save and Restore? */ +static bool has_ossr; + /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; @@ -904,6 +907,23 @@ static struct undef_hook debug_reg_hook = { .fn = debug_reg_trap, }; +/* Does this core support OS Save and Restore? */ +static bool core_has_os_save_restore(void) +{ + u32 oslsr; + + switch (get_debug_arch()) { + case ARM_DEBUG_ARCH_V7_1: + return true; + case ARM_DEBUG_ARCH_V7_ECP14: + ARM_DBG_READ(c1, c1, 4, oslsr); + if (oslsr & ARM_OSLSR_OSLM0) + return true; + default: + return false; + } +} + static void reset_ctrl_regs(void *unused) { int i, raw_num_brps, err = 0, cpu = smp_processor_id(); @@ -931,11 +951,7 @@ static void reset_ctrl_regs(void *unused) if ((val & 0x1) == 0) err = -EPERM; - /* - * Check whether we implement OS save and restore. - */ - ARM_DBG_READ(c1, c1, 4, val); - if ((val & 0x9) == 0) + if (!has_ossr) goto clear_vcr; break; case ARM_DEBUG_ARCH_V7_1: @@ -1025,6 +1041,8 @@ static int __init arch_hw_breakpoint_init(void) return 0; } + has_ossr = core_has_os_save_restore(); + /* Determine how many BRPs/WRPs are available. */ core_num_brps = get_num_brps(); core_num_wrps = get_num_wrps(); -- cgit v1.2.3 From 9a6eb310eaa5336b89a27a0bbb78da4bba35f6f1 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Sun, 14 Oct 2012 22:25:37 +0100 Subject: ARM: hw_breakpoint: Debug powerdown support for self-hosted debug This patch introduces debug powerdown support for self-hosted debug for v7 and v7.1 debug architecture for a SinglePower system, i.e. a system without a separate core and debug power domain. On a SinglePower system the OS Lock is lost over a powerdown. If CONFIG_CPU_PM is set the new function pm_init() registers hw_breakpoint with CPU PM for a system supporting OS Save and Restore. Receiving a CPU PM EXIT notifier indicates that a single CPU has exited a low power state. A call to reset_ctrl_regs() is hooked into the CPU PM EXIT notifier chain. This function makes sure that the sticky power-down is clear (only v7 debug), the OS Double Lock is clear (only v7.1 debug) and it clears the OS Lock for v7 debug (for a system supporting OS Save and Restore) and v7.1 debug. Furthermore, it clears any vector-catch events and all breakpoint/watchpoint control/value registers for v7 and v7.1 debug. Signed-off-by: Dietmar Eggemann [will: removed redundant has_ossr check] Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 201d4406fe0d..5eae53e7a2e1 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1032,6 +1033,30 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = { .notifier_call = dbg_reset_notify, }; +#ifdef CONFIG_CPU_PM +static int dbg_cpu_pm_notify(struct notifier_block *self, unsigned long action, + void *v) +{ + if (action == CPU_PM_EXIT) + reset_ctrl_regs(NULL); + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata dbg_cpu_pm_nb = { + .notifier_call = dbg_cpu_pm_notify, +}; + +static void __init pm_init(void) +{ + cpu_pm_register_notifier(&dbg_cpu_pm_nb); +} +#else +static inline void pm_init(void) +{ +} +#endif + static int __init arch_hw_breakpoint_init(void) { debug_arch = get_debug_arch(); @@ -1081,8 +1106,9 @@ static int __init arch_hw_breakpoint_init(void) hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, "breakpoint debug exception"); - /* Register hotplug notifier. */ + /* Register hotplug and PM notifiers. */ register_cpu_notifier(&dbg_reset_nb); + pm_init(); return 0; } arch_initcall(arch_hw_breakpoint_init); -- cgit v1.2.3 From 3b953c9c159e99f5465dc05dc90ca405fe9a4436 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 18 Dec 2012 04:06:38 +0000 Subject: ARM: Use implementor and part defines from cputype.h Instead of decoding implementor numbers, part numbers and Xscale architecture masks inline in the pmu probing function, use defines and accessor functions from cputype.h, which can also be shared by other subsystems, such as KVM. Signed-off-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 5f6620684e25..efa52954d628 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -201,48 +201,46 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); - unsigned long cpuid = read_cpuid_id(); - unsigned long implementor = (cpuid & 0xFF000000) >> 24; - unsigned long part_number = (cpuid & 0xFFF0); + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); /* ARM Ltd CPUs. */ - if (0x41 == implementor) { + if (implementor == ARM_CPU_IMP_ARM) { switch (part_number) { - case 0xB360: /* ARM1136 */ - case 0xB560: /* ARM1156 */ - case 0xB760: /* ARM1176 */ + case ARM_CPU_PART_ARM1136: + case ARM_CPU_PART_ARM1156: + case ARM_CPU_PART_ARM1176: ret = armv6pmu_init(pmu); break; - case 0xB020: /* ARM11mpcore */ + case ARM_CPU_PART_ARM11MPCORE: ret = armv6mpcore_pmu_init(pmu); break; - case 0xC080: /* Cortex-A8 */ + case ARM_CPU_PART_CORTEX_A8: ret = armv7_a8_pmu_init(pmu); break; - case 0xC090: /* Cortex-A9 */ + case ARM_CPU_PART_CORTEX_A9: ret = armv7_a9_pmu_init(pmu); break; - case 0xC050: /* Cortex-A5 */ + case ARM_CPU_PART_CORTEX_A5: ret = armv7_a5_pmu_init(pmu); break; - case 0xC0F0: /* Cortex-A15 */ + case ARM_CPU_PART_CORTEX_A15: ret = armv7_a15_pmu_init(pmu); break; - case 0xC070: /* Cortex-A7 */ + case ARM_CPU_PART_CORTEX_A7: ret = armv7_a7_pmu_init(pmu); break; } /* Intel CPUs [xscale]. */ - } else if (0x69 == implementor) { - part_number = (cpuid >> 13) & 0x7; - switch (part_number) { - case 1: + } else if (implementor == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: ret = xscale1pmu_init(pmu); break; - case 2: + case ARM_CPU_XSCALE_ARCH_V2: ret = xscale2pmu_init(pmu); break; } -- cgit v1.2.3 From 1764c591dfed2ce7075465df0591ce9564ff37a1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Jan 2013 17:27:35 +0000 Subject: ARM: perf: remove redundant NULL check on cpu_pmu cpu_pmu has already been dereferenced before we consider invoking the ->reset function, so remove the redundant NULL check. Reported-by: Cong Ding Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index efa52954d628..43496f600569 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -147,7 +147,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->free_irq = cpu_pmu_free_irq; /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu && cpu_pmu->reset) + if (cpu_pmu->reset) on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); } -- cgit v1.2.3 From 40c390c768f898497e17d934f6715d516ff67294 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Jan 2013 12:01:59 +0000 Subject: ARM: perf: don't pretend to support counting of L1I writes ARM has a harvard cache architecture and cannot write directly to the I-side. This patch removes the L1I write events from the cache map (which previously returned *read* events in many cases). Reported-by: Mike Williams Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 4 ++-- arch/arm/kernel/perf_event_v7.c | 18 +++++++++--------- arch/arm/kernel/perf_event_xscale.c | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 041d0526a288..03664b0e8fa4 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -106,7 +106,7 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -259,7 +259,7 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4fbc757d9cff..8c79a9e70b83 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -157,8 +157,8 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -282,7 +282,7 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -399,8 +399,8 @@ static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, /* * The prefetch counters don't differentiate between the I @@ -527,8 +527,8 @@ static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -651,8 +651,8 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 2b0fe30ec12e..63990c42fac9 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -83,7 +83,7 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, -- cgit v1.2.3 From 76b8a0e4c8bda5f03574b8a904331266d162c796 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2013 13:42:58 +0000 Subject: ARM: perf: handle armpmu_register failing Currently perf_pmu_register may fail for several reasons (e.g. being unable to allocate memory for the struct device it associates with each PMU), and while any error is propagated by armpmu_register, it is ignored by cpu_pmu_device_probe and not propagated to the caller. This also results in a leak of a struct arm_pmu. This patch adds cleanup if armpmu_register fails, and updates the info messages to better differentiate this type of failure from a failure to probe the PMU type from the hardware or dt. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 43496f600569..1f2740e3dbc0 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -277,17 +277,22 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) } if (ret) { - pr_info("failed to register PMU devices!"); - kfree(pmu); - return ret; + pr_info("failed to probe PMU!"); + goto out_free; } cpu_pmu = pmu; cpu_pmu->plat_device = pdev; cpu_pmu_init(cpu_pmu); - armpmu_register(cpu_pmu, PERF_TYPE_RAW); + ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); - return 0; + if (!ret) + return 0; + +out_free: + pr_info("failed to register PMU devices!"); + kfree(pmu); + return ret; } static struct platform_driver cpu_pmu_driver = { -- cgit v1.2.3 From 8f3b90b585d3e879b03ce2a202da04d59dd5b699 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2013 13:42:59 +0000 Subject: ARM: perf: remove unnecessary checks for idx < 0 We currently check for hwx->idx < 0 in armpmu_read and armpmu_del unnecessarily. The only case where hwc->idx < 0 is when armpmu_add fails, in which case the event's state is set to PERF_EVENT_STATE_INACTIVE. The perf core will not attempt to read from an event in PERF_EVENT_STATE_INACTIVE, and so the check in armpmu_read is unnecessary. Similarly, if perf core cannot add an event it will not attempt to delete it, so the WARN_ON in armpmu_del is unnecessary. This patch removes these two redundant checks. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f9e8657dd241..6df1969811c8 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -149,12 +149,6 @@ again: static void armpmu_read(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - - /* Don't read disabled counters! */ - if (hwc->idx < 0) - return; - armpmu_event_update(event); } @@ -207,8 +201,6 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - WARN_ON(idx < 0); - armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; clear_bit(idx, hw_events->used_mask); -- cgit v1.2.3 From 9dcbf466559f6f2f55d60eb5a1bbebc8e694b52a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2013 16:10:06 +0000 Subject: ARM: perf: simplify __hw_perf_event_init err handling Currently __hw_perf_event_init has an err variable that's ignored right until the end, where it's initialised, conditionally set, and then used as a boolean flag deciding whether to return another error code. This patch removes the err variable and simplifies the associated error handling logic. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 6df1969811c8..31e0eb353cd8 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -350,7 +350,7 @@ __hw_perf_event_init(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - int mapping, err; + int mapping; mapping = armpmu->map_event(event); @@ -399,14 +399,12 @@ __hw_perf_event_init(struct perf_event *event) local64_set(&hwc->period_left, hwc->sample_period); } - err = 0; if (event->group_leader != event) { - err = validate_group(event); - if (err) + if (validate_group(event) != 0); return -EINVAL; } - return err; + return 0; } static int armpmu_event_init(struct perf_event *event) -- cgit v1.2.3 From 9e9a367c29cebd25a356d53414612e115efdadcf Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:43:10 -0500 Subject: ARM: Section based HYP idmap Add a method (hyp_idmap_setup) to populate a hyp pgd with an identity mapping of the code contained in the .hyp.idmap.text section. Offer a method to drop this identity mapping through hyp_idmap_teardown. Make all the above depend on CONFIG_ARM_VIRT_EXT and CONFIG_ARM_LPAE. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/idmap.h | 1 + arch/arm/include/asm/pgtable-3level-hwdef.h | 1 + arch/arm/kernel/vmlinux.lds.S | 6 +++- arch/arm/mm/idmap.c | 55 +++++++++++++++++++++++------ 4 files changed, 51 insertions(+), 12 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h index bf863edb517d..1a66f907e5cc 100644 --- a/arch/arm/include/asm/idmap.h +++ b/arch/arm/include/asm/idmap.h @@ -8,6 +8,7 @@ #define __idmap __section(.idmap.text) noinline notrace extern pgd_t *idmap_pgd; +extern pgd_t *hyp_pgd; void setup_mm_for_reboot(void); diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index d7952824c5c4..a2d404ed1ade 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -44,6 +44,7 @@ #define PMD_SECT_XN (_AT(pmdval_t, 1) << 54) #define PMD_SECT_AP_WRITE (_AT(pmdval_t, 0)) #define PMD_SECT_AP_READ (_AT(pmdval_t, 0)) +#define PMD_SECT_AP1 (_AT(pmdval_t, 1) << 6) #define PMD_SECT_TEX(x) (_AT(pmdval_t, 0)) /* diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 11c1785bf63e..b571484e9f03 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -19,7 +19,11 @@ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__idmap_text_start) = .; \ *(.idmap.text) \ - VMLINUX_SYMBOL(__idmap_text_end) = .; + VMLINUX_SYMBOL(__idmap_text_end) = .; \ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; #ifdef CONFIG_HOTPLUG_CPU #define ARM_CPU_DISCARD(x) diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 99db769307ec..2dffc010cc41 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -1,4 +1,6 @@ +#include #include +#include #include #include @@ -6,6 +8,7 @@ #include #include #include +#include pgd_t *idmap_pgd; @@ -59,11 +62,17 @@ static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end, } while (pud++, addr = next, addr != end); } -static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end) +static void identity_mapping_add(pgd_t *pgd, const char *text_start, + const char *text_end, unsigned long prot) { - unsigned long prot, next; + unsigned long addr, end; + unsigned long next; + + addr = virt_to_phys(text_start); + end = virt_to_phys(text_end); + + prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; - prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) prot |= PMD_BIT4; @@ -74,28 +83,52 @@ static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long e } while (pgd++, addr = next, addr != end); } +#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE) +pgd_t *hyp_pgd; + +extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; + +static int __init init_static_idmap_hyp(void) +{ + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + if (!hyp_pgd) + return -ENOMEM; + + pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n", + __hyp_idmap_text_start, __hyp_idmap_text_end); + identity_mapping_add(hyp_pgd, __hyp_idmap_text_start, + __hyp_idmap_text_end, PMD_SECT_AP1); + + return 0; +} +#else +static int __init init_static_idmap_hyp(void) +{ + return 0; +} +#endif + extern char __idmap_text_start[], __idmap_text_end[]; static int __init init_static_idmap(void) { - phys_addr_t idmap_start, idmap_end; + int ret; idmap_pgd = pgd_alloc(&init_mm); if (!idmap_pgd) return -ENOMEM; - /* Add an identity mapping for the physical address of the section. */ - idmap_start = virt_to_phys((void *)__idmap_text_start); - idmap_end = virt_to_phys((void *)__idmap_text_end); + pr_info("Setting up static identity map for 0x%p - 0x%p\n", + __idmap_text_start, __idmap_text_end); + identity_mapping_add(idmap_pgd, __idmap_text_start, + __idmap_text_end, 0); - pr_info("Setting up static identity map for 0x%llx - 0x%llx\n", - (long long)idmap_start, (long long)idmap_end); - identity_mapping_add(idmap_pgd, idmap_start, idmap_end); + ret = init_static_idmap_hyp(); /* Flush L1 for the hardware to see this page table content */ flush_cache_louis(); - return 0; + return ret; } early_initcall(init_static_idmap); -- cgit v1.2.3 From f7ed45be3ba524e06a6d933f0517dc7ad2d06703 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:47:42 -0500 Subject: KVM: ARM: World-switch implementation Provides complete world-switch implementation to switch to other guests running in non-secure modes. Includes Hyp exception handlers that capture necessary exception information and stores the information on the VCPU and KVM structures. The following Hyp-ABI is also documented in the code: Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): Switching to Hyp mode is done through a simple HVC #0 instruction. The exception vector code will check that the HVC comes from VMID==0 and if so will push the necessary state (SPSR, lr_usr) on the Hyp stack. - r0 contains a pointer to a HYP function - r1, r2, and r3 contain arguments to the above function. - The HYP function will be called with its arguments in r0, r1 and r2. On HYP function return, we return directly to SVC. A call to a function executing in Hyp mode is performed like the following: ldr r0, =BSYM(my_hyp_fn) ldr r1, =my_param hvc #0 ; Call my_hyp_fn(my_param) from HYP mode Otherwise, the world-switch is pretty straight-forward. All state that can be modified by the guest is first backed up on the Hyp stack and the VCPU values is loaded onto the hardware. State, which is not loaded, but theoretically modifiable by the guest is protected through the virtualiation features to generate a trap and cause software emulation. Upon guest returns, all state is restored from hardware onto the VCPU struct and the original state is restored from the Hyp-stack onto the hardware. SMP support using the VMPIDR calculated on the basis of the host MPIDR and overriding the low bits with KVM vcpu_id contributed by Marc Zyngier. Reuse of VMIDs has been implemented by Antonios Motakis and adapated from a separate patch into the appropriate patches introducing the functionality. Note that the VMIDs are stored per VM as required by the ARM architecture reference manual. To support VFP/NEON we trap those instructions using the HPCTR. When we trap, we switch the FPU. After a guest exit, the VFP state is returned to the host. When disabling access to floating point instructions, we also mask FPEXC_EN in order to avoid the guest receiving Undefined instruction exceptions before we have a chance to switch back the floating point state. We are reusing vfp_hard_struct, so we depend on VFPv3 being enabled in the host kernel, if not, we still trap cp10 and cp11 in order to inject an undefined instruction exception whenever the guest tries to use VFP/NEON. VFP/NEON developed by Antionios Motakis and Rusty Russell. Aborts that are permission faults, and not stage-1 page table walk, do not report the faulting address in the HPFAR. We have to resolve the IPA, and store it just like the HPFAR register on the VCPU struct. If the IPA cannot be resolved, it means another CPU is playing with the page tables, and we simply restart the guest. This quirk was fixed by Marc Zyngier. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Rusty Russell Signed-off-by: Antonios Motakis Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_arm.h | 51 +++++ arch/arm/include/asm/kvm_host.h | 13 ++ arch/arm/kernel/asm-offsets.c | 25 +++ arch/arm/kvm/arm.c | 200 +++++++++++++++++- arch/arm/kvm/interrupts.S | 396 +++++++++++++++++++++++++++++++++++- arch/arm/kvm/interrupts_head.S | 441 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1122 insertions(+), 4 deletions(-) create mode 100644 arch/arm/kvm/interrupts_head.S (limited to 'arch/arm/kernel') diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index d64b5250ad4e..c69936b1fc53 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -98,6 +98,18 @@ #define TTBCR_T0SZ 3 #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) +/* Hyp System Trap Register */ +#define HSTR_T(x) (1 << x) +#define HSTR_TTEE (1 << 16) +#define HSTR_TJDBX (1 << 17) + +/* Hyp Coprocessor Trap Register */ +#define HCPTR_TCP(x) (1 << x) +#define HCPTR_TCP_MASK (0x3fff) +#define HCPTR_TASE (1 << 15) +#define HCPTR_TTA (1 << 20) +#define HCPTR_TCPAC (1 << 31) + /* Hyp Debug Configuration Register bits */ #define HDCR_TDRA (1 << 11) #define HDCR_TDOSA (1 << 10) @@ -144,6 +156,45 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif +#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) +#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT (48LLU) +#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) + +/* Hyp Syndrome Register (HSR) bits */ +#define HSR_EC_SHIFT (26) +#define HSR_EC (0x3fU << HSR_EC_SHIFT) +#define HSR_IL (1U << 25) +#define HSR_ISS (HSR_IL - 1) +#define HSR_ISV_SHIFT (24) +#define HSR_ISV (1U << HSR_ISV_SHIFT) +#define HSR_FSC (0x3f) +#define HSR_FSC_TYPE (0x3c) +#define HSR_WNR (1 << 6) + +#define FSC_FAULT (0x04) +#define FSC_PERM (0x0c) + +/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ +#define HPFAR_MASK (~0xf) +#define HSR_EC_UNKNOWN (0x00) +#define HSR_EC_WFI (0x01) +#define HSR_EC_CP15_32 (0x03) +#define HSR_EC_CP15_64 (0x04) +#define HSR_EC_CP14_MR (0x05) +#define HSR_EC_CP14_LS (0x06) +#define HSR_EC_CP_0_13 (0x07) +#define HSR_EC_CP10_ID (0x08) +#define HSR_EC_JAZELLE (0x09) +#define HSR_EC_BXJ (0x0A) +#define HSR_EC_CP14_64 (0x0C) +#define HSR_EC_SVC_HYP (0x11) +#define HSR_EC_HVC (0x12) +#define HSR_EC_SMC (0x13) +#define HSR_EC_IABT (0x20) +#define HSR_EC_IABT_HYP (0x21) +#define HSR_EC_DABT (0x24) +#define HSR_EC_DABT_HYP (0x25) #endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 3636c7ea4eb2..7a121089c733 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include #include +#include #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #define KVM_MEMORY_SLOTS 32 @@ -85,6 +86,14 @@ struct kvm_vcpu_arch { u32 hxfar; /* Hyp Data/Inst Fault Address Register */ u32 hpfar; /* Hyp IPA Fault Address Register */ + /* Floating point registers (VFP and Advanced SIMD/NEON) */ + struct vfp_hard_struct vfp_guest; + struct vfp_hard_struct *vfp_host; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ /* Interrupt related fields */ u32 irq_lines; /* IRQ and FIQ levels */ @@ -93,6 +102,9 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; + + /* Detect first run of a vcpu */ + bool has_run_once; }; struct kvm_vm_stat { @@ -112,6 +124,7 @@ struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); +void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER struct kvm; diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c985b481192c..c8b3272dfed1 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_KVM_ARM_HOST +#include +#endif #include #include #include @@ -146,5 +149,27 @@ int main(void) DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); + DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); + DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); + DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host)); + DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); + DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); + DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); + DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs)); + DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs)); + DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs)); + DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); + DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.hsr)); + DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.hxfar)); + DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar)); + DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.hyp_pc)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); +#endif return 0; } diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2101152c3a4b..9e9fa4477884 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -40,6 +40,7 @@ #include #include #include +#include #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); @@ -49,6 +50,10 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); static struct vfp_hard_struct __percpu *kvm_host_vfp_state; static unsigned long hyp_default_vectors; +/* The VMID used in the VTTBR */ +static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); +static u8 kvm_next_vmid; +static DEFINE_SPINLOCK(kvm_vmid_lock); int kvm_arch_hardware_enable(void *garbage) { @@ -276,6 +281,8 @@ int __attribute_const__ kvm_target_cpu(void) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + /* Force users to call KVM_ARM_VCPU_INIT */ + vcpu->arch.target = -1; return 0; } @@ -286,6 +293,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; + vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -316,9 +324,199 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return 0; } +/* Just ensure a guest exit from a particular CPU */ +static void exit_vm_noop(void *info) +{ +} + +void force_vm_exit(const cpumask_t *mask) +{ + smp_call_function_many(mask, exit_vm_noop, NULL, true); +} + +/** + * need_new_vmid_gen - check that the VMID is still valid + * @kvm: The VM's VMID to checkt + * + * return true if there is a new generation of VMIDs being used + * + * The hardware supports only 256 values with the value zero reserved for the + * host, so we check if an assigned value belongs to a previous generation, + * which which requires us to assign a new value. If we're the first to use a + * VMID for the new generation, we must flush necessary caches and TLBs on all + * CPUs. + */ +static bool need_new_vmid_gen(struct kvm *kvm) +{ + return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen)); +} + +/** + * update_vttbr - Update the VTTBR with a valid VMID before the guest runs + * @kvm The guest that we are about to run + * + * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the + * VM has a valid VMID, otherwise assigns a new one and flushes corresponding + * caches and TLBs. + */ +static void update_vttbr(struct kvm *kvm) +{ + phys_addr_t pgd_phys; + u64 vmid; + + if (!need_new_vmid_gen(kvm)) + return; + + spin_lock(&kvm_vmid_lock); + + /* + * We need to re-check the vmid_gen here to ensure that if another vcpu + * already allocated a valid vmid for this vm, then this vcpu should + * use the same vmid. + */ + if (!need_new_vmid_gen(kvm)) { + spin_unlock(&kvm_vmid_lock); + return; + } + + /* First user of a new VMID generation? */ + if (unlikely(kvm_next_vmid == 0)) { + atomic64_inc(&kvm_vmid_gen); + kvm_next_vmid = 1; + + /* + * On SMP we know no other CPUs can use this CPU's or each + * other's VMID after force_vm_exit returns since the + * kvm_vmid_lock blocks them from reentry to the guest. + */ + force_vm_exit(cpu_all_mask); + /* + * Now broadcast TLB + ICACHE invalidation over the inner + * shareable domain to make sure all data structures are + * clean. + */ + kvm_call_hyp(__kvm_flush_vm_context); + } + + kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); + kvm->arch.vmid = kvm_next_vmid; + kvm_next_vmid++; + + /* update vttbr to be used with the new vmid */ + pgd_phys = virt_to_phys(kvm->arch.pgd); + vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; + kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; + kvm->arch.vttbr |= vmid; + + spin_unlock(&kvm_vmid_lock); +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to QEMU. + */ +static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index) +{ + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; +} + +static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) +{ + if (likely(vcpu->arch.has_run_once)) + return 0; + + vcpu->arch.has_run_once = true; + return 0; +} + +/** + * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code + * @vcpu: The VCPU pointer + * @run: The kvm_run structure pointer used for userspace state exchange + * + * This function is called through the VCPU_RUN ioctl called from user space. It + * will execute VM code in a loop until the time slice for the process is used + * or some emulation is needed from user space in which case the function will + * return with return value 0 and with the kvm_run structure filled in with the + * required data for the requested emulation. + */ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { - return -EINVAL; + int ret; + sigset_t sigsaved; + + /* Make sure they initialize the vcpu with KVM_ARM_VCPU_INIT */ + if (unlikely(vcpu->arch.target < 0)) + return -ENOEXEC; + + ret = kvm_vcpu_first_run_init(vcpu); + if (ret) + return ret; + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + ret = 1; + run->exit_reason = KVM_EXIT_UNKNOWN; + while (ret > 0) { + /* + * Check conditions before entering the guest + */ + cond_resched(); + + update_vttbr(vcpu->kvm); + + local_irq_disable(); + + /* + * Re-check atomic conditions + */ + if (signal_pending(current)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + + if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { + local_irq_enable(); + continue; + } + + /************************************************************** + * Enter the guest + */ + trace_kvm_entry(*vcpu_pc(vcpu)); + kvm_guest_enter(); + vcpu->mode = IN_GUEST_MODE; + + ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); + + vcpu->mode = OUTSIDE_GUEST_MODE; + kvm_guest_exit(); + trace_kvm_exit(*vcpu_pc(vcpu)); + /* + * We may have taken a host interrupt in HYP mode (ie + * while executing the guest). This interrupt is still + * pending, as we haven't serviced it yet! + * + * We're now back in SVC mode, with interrupts + * disabled. Enabling the interrupts now will have + * the effect of taking the interrupt again, in SVC + * mode this time. + */ + local_irq_enable(); + + /* + * Back from guest + *************************************************************/ + + ret = handle_exit(vcpu, run, ret); + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + return ret; } static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f701aff31e44..c5400d2e97ca 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -20,9 +20,12 @@ #include #include #include +#include #include #include #include +#include +#include "interrupts_head.S" .text @@ -31,23 +34,164 @@ __kvm_hyp_code_start: /******************************************************************** * Flush per-VMID TLBs + * + * void __kvm_tlb_flush_vmid(struct kvm *kvm); + * + * We rely on the hardware to broadcast the TLB invalidation to all CPUs + * inside the inner-shareable domain (which is the case for all v7 + * implementations). If we come across a non-IS SMP implementation, we'll + * have to use an IPI based mechanism. Until then, we stick to the simple + * hardware assisted version. */ ENTRY(__kvm_tlb_flush_vmid) + push {r2, r3} + + add r0, r0, #KVM_VTTBR + ldrd r2, r3, [r0] + mcrr p15, 6, r2, r3, c2 @ Write VTTBR + isb + mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored) + dsb + isb + mov r2, #0 + mov r3, #0 + mcrr p15, 6, r2, r3, c2 @ Back to VMID #0 + isb @ Not necessary if followed by eret + + pop {r2, r3} bx lr ENDPROC(__kvm_tlb_flush_vmid) /******************************************************************** - * Flush TLBs and instruction caches of current CPU for all VMIDs + * Flush TLBs and instruction caches of all CPUs inside the inner-shareable + * domain, for all VMIDs + * + * void __kvm_flush_vm_context(void); */ ENTRY(__kvm_flush_vm_context) + mov r0, #0 @ rn parameter for c15 flushes is SBZ + + /* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */ + mcr p15, 4, r0, c8, c3, 4 + /* Invalidate instruction caches Inner Shareable (ICIALLUIS) */ + mcr p15, 0, r0, c7, c1, 0 + dsb + isb @ Not necessary if followed by eret + bx lr ENDPROC(__kvm_flush_vm_context) + /******************************************************************** * Hypervisor world-switch code + * + * + * int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ ENTRY(__kvm_vcpu_run) - bx lr + @ Save the vcpu pointer + mcr p15, 4, vcpu, c13, c0, 2 @ HTPIDR + + save_host_regs + + @ Store hardware CP15 state and load guest state + read_cp15_state store_to_vcpu = 0 + write_cp15_state read_from_vcpu = 1 + + @ If the host kernel has not been configured with VFPv3 support, + @ then it is safer if we deny guests from using it as well. +#ifdef CONFIG_VFPv3 + @ Set FPEXC_EN so the guest doesn't trap floating point instructions + VFPFMRX r2, FPEXC @ VMRS + push {r2} + orr r2, r2, #FPEXC_EN + VFPFMXR FPEXC, r2 @ VMSR +#endif + + @ Configure Hyp-role + configure_hyp_role vmentry + + @ Trap coprocessor CRx accesses + set_hstr vmentry + set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + set_hdcr vmentry + + @ Write configured ID register into MIDR alias + ldr r1, [vcpu, #VCPU_MIDR] + mcr p15, 4, r1, c0, c0, 0 + + @ Write guest view of MPIDR into VMPIDR + ldr r1, [vcpu, #CP15_OFFSET(c0_MPIDR)] + mcr p15, 4, r1, c0, c0, 5 + + @ Set up guest memory translation + ldr r1, [vcpu, #VCPU_KVM] + add r1, r1, #KVM_VTTBR + ldrd r2, r3, [r1] + mcrr p15, 6, r2, r3, c2 @ Write VTTBR + + @ We're all done, just restore the GPRs and go to the guest + restore_guest_regs + clrex @ Clear exclusive monitor + eret + +__kvm_vcpu_return: + /* + * return convention: + * guest r0, r1, r2 saved on the stack + * r0: vcpu pointer + * r1: exception code + */ + save_guest_regs + + @ Set VMID == 0 + mov r2, #0 + mov r3, #0 + mcrr p15, 6, r2, r3, c2 @ Write VTTBR + + @ Don't trap coprocessor accesses for host kernel + set_hstr vmexit + set_hdcr vmexit + set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + +#ifdef CONFIG_VFPv3 + @ Save floating point registers we if let guest use them. + tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) + bne after_vfp_restore + + @ Switch VFP/NEON hardware state to the host's + add r7, vcpu, #VCPU_VFP_GUEST + store_vfp_state r7 + add r7, vcpu, #VCPU_VFP_HOST + ldr r7, [r7] + restore_vfp_state r7 + +after_vfp_restore: + @ Restore FPEXC_EN which we clobbered on entry + pop {r2} + VFPFMXR FPEXC, r2 +#endif + + @ Reset Hyp-role + configure_hyp_role vmexit + + @ Let host read hardware MIDR + mrc p15, 0, r2, c0, c0, 0 + mcr p15, 4, r2, c0, c0, 0 + + @ Back to hardware MPIDR + mrc p15, 0, r2, c0, c0, 5 + mcr p15, 4, r2, c0, c0, 5 + + @ Store guest CP15 state and restore host state + read_cp15_state store_to_vcpu = 1 + write_cp15_state read_from_vcpu = 0 + + restore_host_regs + clrex @ Clear exclusive monitor + mov r0, r1 @ Return the return code + mov r1, #0 @ Clear upper bits in return value + bx lr @ return to IOCTL /******************************************************************** * Call function in Hyp mode @@ -77,12 +221,258 @@ ENTRY(kvm_call_hyp) /******************************************************************** * Hypervisor exception vector and handlers + * + * + * The KVM/ARM Hypervisor ABI is defined as follows: + * + * Entry to Hyp mode from the host kernel will happen _only_ when an HVC + * instruction is issued since all traps are disabled when running the host + * kernel as per the Hyp-mode initialization at boot time. + * + * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc + * below) when the HVC instruction is called from SVC mode (i.e. a guest or the + * host kernel) and they cause a trap to the vector page + offset 0xc when HVC + * instructions are called from within Hyp-mode. + * + * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode): + * Switching to Hyp mode is done through a simple HVC #0 instruction. The + * exception vector code will check that the HVC comes from VMID==0 and if + * so will push the necessary state (SPSR, lr_usr) on the Hyp stack. + * - r0 contains a pointer to a HYP function + * - r1, r2, and r3 contain arguments to the above function. + * - The HYP function will be called with its arguments in r0, r1 and r2. + * On HYP function return, we return directly to SVC. + * + * Note that the above is used to execute code in Hyp-mode from a host-kernel + * point of view, and is a different concept from performing a world-switch and + * executing guest code SVC mode (with a VMID != 0). */ +/* Handle undef, svc, pabt, or dabt by crashing with a user notice */ +.macro bad_exception exception_code, panic_str + push {r0-r2} + mrrc p15, 6, r0, r1, c2 @ Read VTTBR + lsr r1, r1, #16 + ands r1, r1, #0xff + beq 99f + + load_vcpu @ Load VCPU pointer + .if \exception_code == ARM_EXCEPTION_DATA_ABORT + mrc p15, 4, r2, c5, c2, 0 @ HSR + mrc p15, 4, r1, c6, c0, 0 @ HDFAR + str r2, [vcpu, #VCPU_HSR] + str r1, [vcpu, #VCPU_HxFAR] + .endif + .if \exception_code == ARM_EXCEPTION_PREF_ABORT + mrc p15, 4, r2, c5, c2, 0 @ HSR + mrc p15, 4, r1, c6, c0, 2 @ HIFAR + str r2, [vcpu, #VCPU_HSR] + str r1, [vcpu, #VCPU_HxFAR] + .endif + mov r1, #\exception_code + b __kvm_vcpu_return + + @ We were in the host already. Let's craft a panic-ing return to SVC. +99: mrs r2, cpsr + bic r2, r2, #MODE_MASK + orr r2, r2, #SVC_MODE +THUMB( orr r2, r2, #PSR_T_BIT ) + msr spsr_cxsf, r2 + mrs r1, ELR_hyp + ldr r2, =BSYM(panic) + msr ELR_hyp, r2 + ldr r0, =\panic_str + eret +.endm + + .text + .align 5 __kvm_hyp_vector: .globl __kvm_hyp_vector - nop + + @ Hyp-mode exception vector + W(b) hyp_reset + W(b) hyp_undef + W(b) hyp_svc + W(b) hyp_pabt + W(b) hyp_dabt + W(b) hyp_hvc + W(b) hyp_irq + W(b) hyp_fiq + + .align +hyp_reset: + b hyp_reset + + .align +hyp_undef: + bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str + + .align +hyp_svc: + bad_exception ARM_EXCEPTION_HVC, svc_die_str + + .align +hyp_pabt: + bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str + + .align +hyp_dabt: + bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str + + .align +hyp_hvc: + /* + * Getting here is either becuase of a trap from a guest or from calling + * HVC from the host kernel, which means "switch to Hyp mode". + */ + push {r0, r1, r2} + + @ Check syndrome register + mrc p15, 4, r1, c5, c2, 0 @ HSR + lsr r0, r1, #HSR_EC_SHIFT +#ifdef CONFIG_VFPv3 + cmp r0, #HSR_EC_CP_0_13 + beq switch_to_guest_vfp +#endif + cmp r0, #HSR_EC_HVC + bne guest_trap @ Not HVC instr. + + /* + * Let's check if the HVC came from VMID 0 and allow simple + * switch to Hyp mode + */ + mrrc p15, 6, r0, r2, c2 + lsr r2, r2, #16 + and r2, r2, #0xff + cmp r2, #0 + bne guest_trap @ Guest called HVC + +host_switch_to_hyp: + pop {r0, r1, r2} + + push {lr} + mrs lr, SPSR + push {lr} + + mov lr, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + +THUMB( orr lr, #1) + blx lr @ Call the HYP function + + pop {lr} + msr SPSR_csxf, lr + pop {lr} + eret + +guest_trap: + load_vcpu @ Load VCPU pointer to r0 + str r1, [vcpu, #VCPU_HSR] + + @ Check if we need the fault information + lsr r1, r1, #HSR_EC_SHIFT + cmp r1, #HSR_EC_IABT + mrceq p15, 4, r2, c6, c0, 2 @ HIFAR + beq 2f + cmp r1, #HSR_EC_DABT + bne 1f + mrc p15, 4, r2, c6, c0, 0 @ HDFAR + +2: str r2, [vcpu, #VCPU_HxFAR] + + /* + * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode: + * + * Abort on the stage 2 translation for a memory access from a + * Non-secure PL1 or PL0 mode: + * + * For any Access flag fault or Translation fault, and also for any + * Permission fault on the stage 2 translation of a memory access + * made as part of a translation table walk for a stage 1 translation, + * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR + * is UNKNOWN. + */ + + /* Check for permission fault, and S1PTW */ + mrc p15, 4, r1, c5, c2, 0 @ HSR + and r0, r1, #HSR_FSC_TYPE + cmp r0, #FSC_PERM + tsteq r1, #(1 << 7) @ S1PTW + mrcne p15, 4, r2, c6, c0, 4 @ HPFAR + bne 3f + + /* Resolve IPA using the xFAR */ + mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR + isb + mrrc p15, 0, r0, r1, c7 @ PAR + tst r0, #1 + bne 4f @ Failed translation + ubfx r2, r0, #12, #20 + lsl r2, r2, #4 + orr r2, r2, r1, lsl #24 + +3: load_vcpu @ Load VCPU pointer to r0 + str r2, [r0, #VCPU_HPFAR] + +1: mov r1, #ARM_EXCEPTION_HVC + b __kvm_vcpu_return + +4: pop {r0, r1, r2} @ Failed translation, return to guest + eret + +/* + * If VFPv3 support is not available, then we will not switch the VFP + * registers; however cp10 and cp11 accesses will still trap and fallback + * to the regular coprocessor emulation code, which currently will + * inject an undefined exception to the guest. + */ +#ifdef CONFIG_VFPv3 +switch_to_guest_vfp: + load_vcpu @ Load VCPU pointer to r0 + push {r3-r7} + + @ NEON/VFP used. Turn on VFP access. + set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11)) + + @ Switch VFP/NEON hardware state to the guest's + add r7, r0, #VCPU_VFP_HOST + ldr r7, [r7] + store_vfp_state r7 + add r7, r0, #VCPU_VFP_GUEST + restore_vfp_state r7 + + pop {r3-r7} + pop {r0-r2} + eret +#endif + + .align +hyp_irq: + push {r0, r1, r2} + mov r1, #ARM_EXCEPTION_IRQ + load_vcpu @ Load VCPU pointer to r0 + b __kvm_vcpu_return + + .align +hyp_fiq: + b hyp_fiq + + .ltorg __kvm_hyp_code_end: .globl __kvm_hyp_code_end + + .section ".rodata" + +und_die_str: + .ascii "unexpected undefined exception in Hyp mode at: %#08x" +pabt_die_str: + .ascii "unexpected prefetch abort in Hyp mode at: %#08x" +dabt_die_str: + .ascii "unexpected data abort in Hyp mode at: %#08x" +svc_die_str: + .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x" diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S new file mode 100644 index 000000000000..6a95d341e9c5 --- /dev/null +++ b/arch/arm/kvm/interrupts_head.S @@ -0,0 +1,441 @@ +#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) +#define VCPU_USR_SP (VCPU_USR_REG(13)) +#define VCPU_USR_LR (VCPU_USR_REG(14)) +#define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4)) + +/* + * Many of these macros need to access the VCPU structure, which is always + * held in r0. These macros should never clobber r1, as it is used to hold the + * exception code on the return path (except of course the macro that switches + * all the registers before the final jump to the VM). + */ +vcpu .req r0 @ vcpu pointer always in r0 + +/* Clobbers {r2-r6} */ +.macro store_vfp_state vfp_base + @ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions + VFPFMRX r2, FPEXC + @ Make sure VFP is enabled so we can touch the registers. + orr r6, r2, #FPEXC_EN + VFPFMXR FPEXC, r6 + + VFPFMRX r3, FPSCR + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + @ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so + @ we only need to save them if FPEXC_EX is set. + VFPFMRX r4, FPINST + tst r2, #FPEXC_FP2V + VFPFMRX r5, FPINST2, ne @ vmrsne + bic r6, r2, #FPEXC_EX @ FPEXC_EX disable + VFPFMXR FPEXC, r6 +1: + VFPFSTMIA \vfp_base, r6 @ Save VFP registers + stm \vfp_base, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2 +.endm + +/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */ +.macro restore_vfp_state vfp_base + VFPFLDMIA \vfp_base, r6 @ Load VFP registers + ldm \vfp_base, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2 + + VFPFMXR FPSCR, r3 + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + VFPFMXR FPINST, r4 + tst r2, #FPEXC_FP2V + VFPFMXR FPINST2, r5, ne +1: + VFPFMXR FPEXC, r2 @ FPEXC (last, in case !EN) +.endm + +/* These are simply for the macros to work - value don't have meaning */ +.equ usr, 0 +.equ svc, 1 +.equ abt, 2 +.equ und, 3 +.equ irq, 4 +.equ fiq, 5 + +.macro push_host_regs_mode mode + mrs r2, SP_\mode + mrs r3, LR_\mode + mrs r4, SPSR_\mode + push {r2, r3, r4} +.endm + +/* + * Store all host persistent registers on the stack. + * Clobbers all registers, in all modes, except r0 and r1. + */ +.macro save_host_regs + /* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */ + mrs r2, ELR_hyp + push {r2} + + /* usr regs */ + push {r4-r12} @ r0-r3 are always clobbered + mrs r2, SP_usr + mov r3, lr + push {r2, r3} + + push_host_regs_mode svc + push_host_regs_mode abt + push_host_regs_mode und + push_host_regs_mode irq + + /* fiq regs */ + mrs r2, r8_fiq + mrs r3, r9_fiq + mrs r4, r10_fiq + mrs r5, r11_fiq + mrs r6, r12_fiq + mrs r7, SP_fiq + mrs r8, LR_fiq + mrs r9, SPSR_fiq + push {r2-r9} +.endm + +.macro pop_host_regs_mode mode + pop {r2, r3, r4} + msr SP_\mode, r2 + msr LR_\mode, r3 + msr SPSR_\mode, r4 +.endm + +/* + * Restore all host registers from the stack. + * Clobbers all registers, in all modes, except r0 and r1. + */ +.macro restore_host_regs + pop {r2-r9} + msr r8_fiq, r2 + msr r9_fiq, r3 + msr r10_fiq, r4 + msr r11_fiq, r5 + msr r12_fiq, r6 + msr SP_fiq, r7 + msr LR_fiq, r8 + msr SPSR_fiq, r9 + + pop_host_regs_mode irq + pop_host_regs_mode und + pop_host_regs_mode abt + pop_host_regs_mode svc + + pop {r2, r3} + msr SP_usr, r2 + mov lr, r3 + pop {r4-r12} + + pop {r2} + msr ELR_hyp, r2 +.endm + +/* + * Restore SP, LR and SPSR for a given mode. offset is the offset of + * this mode's registers from the VCPU base. + * + * Assumes vcpu pointer in vcpu reg + * + * Clobbers r1, r2, r3, r4. + */ +.macro restore_guest_regs_mode mode, offset + add r1, vcpu, \offset + ldm r1, {r2, r3, r4} + msr SP_\mode, r2 + msr LR_\mode, r3 + msr SPSR_\mode, r4 +.endm + +/* + * Restore all guest registers from the vcpu struct. + * + * Assumes vcpu pointer in vcpu reg + * + * Clobbers *all* registers. + */ +.macro restore_guest_regs + restore_guest_regs_mode svc, #VCPU_SVC_REGS + restore_guest_regs_mode abt, #VCPU_ABT_REGS + restore_guest_regs_mode und, #VCPU_UND_REGS + restore_guest_regs_mode irq, #VCPU_IRQ_REGS + + add r1, vcpu, #VCPU_FIQ_REGS + ldm r1, {r2-r9} + msr r8_fiq, r2 + msr r9_fiq, r3 + msr r10_fiq, r4 + msr r11_fiq, r5 + msr r12_fiq, r6 + msr SP_fiq, r7 + msr LR_fiq, r8 + msr SPSR_fiq, r9 + + @ Load return state + ldr r2, [vcpu, #VCPU_PC] + ldr r3, [vcpu, #VCPU_CPSR] + msr ELR_hyp, r2 + msr SPSR_cxsf, r3 + + @ Load user registers + ldr r2, [vcpu, #VCPU_USR_SP] + ldr r3, [vcpu, #VCPU_USR_LR] + msr SP_usr, r2 + mov lr, r3 + add vcpu, vcpu, #(VCPU_USR_REGS) + ldm vcpu, {r0-r12} +.endm + +/* + * Save SP, LR and SPSR for a given mode. offset is the offset of + * this mode's registers from the VCPU base. + * + * Assumes vcpu pointer in vcpu reg + * + * Clobbers r2, r3, r4, r5. + */ +.macro save_guest_regs_mode mode, offset + add r2, vcpu, \offset + mrs r3, SP_\mode + mrs r4, LR_\mode + mrs r5, SPSR_\mode + stm r2, {r3, r4, r5} +.endm + +/* + * Save all guest registers to the vcpu struct + * Expects guest's r0, r1, r2 on the stack. + * + * Assumes vcpu pointer in vcpu reg + * + * Clobbers r2, r3, r4, r5. + */ +.macro save_guest_regs + @ Store usr registers + add r2, vcpu, #VCPU_USR_REG(3) + stm r2, {r3-r12} + add r2, vcpu, #VCPU_USR_REG(0) + pop {r3, r4, r5} @ r0, r1, r2 + stm r2, {r3, r4, r5} + mrs r2, SP_usr + mov r3, lr + str r2, [vcpu, #VCPU_USR_SP] + str r3, [vcpu, #VCPU_USR_LR] + + @ Store return state + mrs r2, ELR_hyp + mrs r3, spsr + str r2, [vcpu, #VCPU_PC] + str r3, [vcpu, #VCPU_CPSR] + + @ Store other guest registers + save_guest_regs_mode svc, #VCPU_SVC_REGS + save_guest_regs_mode abt, #VCPU_ABT_REGS + save_guest_regs_mode und, #VCPU_UND_REGS + save_guest_regs_mode irq, #VCPU_IRQ_REGS +.endm + +/* Reads cp15 registers from hardware and stores them in memory + * @store_to_vcpu: If 0, registers are written in-order to the stack, + * otherwise to the VCPU struct pointed to by vcpup + * + * Assumes vcpu pointer in vcpu reg + * + * Clobbers r2 - r12 + */ +.macro read_cp15_state store_to_vcpu + mrc p15, 0, r2, c1, c0, 0 @ SCTLR + mrc p15, 0, r3, c1, c0, 2 @ CPACR + mrc p15, 0, r4, c2, c0, 2 @ TTBCR + mrc p15, 0, r5, c3, c0, 0 @ DACR + mrrc p15, 0, r6, r7, c2 @ TTBR 0 + mrrc p15, 1, r8, r9, c2 @ TTBR 1 + mrc p15, 0, r10, c10, c2, 0 @ PRRR + mrc p15, 0, r11, c10, c2, 1 @ NMRR + mrc p15, 2, r12, c0, c0, 0 @ CSSELR + + .if \store_to_vcpu == 0 + push {r2-r12} @ Push CP15 registers + .else + str r2, [vcpu, #CP15_OFFSET(c1_SCTLR)] + str r3, [vcpu, #CP15_OFFSET(c1_CPACR)] + str r4, [vcpu, #CP15_OFFSET(c2_TTBCR)] + str r5, [vcpu, #CP15_OFFSET(c3_DACR)] + add r2, vcpu, #CP15_OFFSET(c2_TTBR0) + strd r6, r7, [r2] + add r2, vcpu, #CP15_OFFSET(c2_TTBR1) + strd r8, r9, [r2] + str r10, [vcpu, #CP15_OFFSET(c10_PRRR)] + str r11, [vcpu, #CP15_OFFSET(c10_NMRR)] + str r12, [vcpu, #CP15_OFFSET(c0_CSSELR)] + .endif + + mrc p15, 0, r2, c13, c0, 1 @ CID + mrc p15, 0, r3, c13, c0, 2 @ TID_URW + mrc p15, 0, r4, c13, c0, 3 @ TID_URO + mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV + mrc p15, 0, r6, c5, c0, 0 @ DFSR + mrc p15, 0, r7, c5, c0, 1 @ IFSR + mrc p15, 0, r8, c5, c1, 0 @ ADFSR + mrc p15, 0, r9, c5, c1, 1 @ AIFSR + mrc p15, 0, r10, c6, c0, 0 @ DFAR + mrc p15, 0, r11, c6, c0, 2 @ IFAR + mrc p15, 0, r12, c12, c0, 0 @ VBAR + + .if \store_to_vcpu == 0 + push {r2-r12} @ Push CP15 registers + .else + str r2, [vcpu, #CP15_OFFSET(c13_CID)] + str r3, [vcpu, #CP15_OFFSET(c13_TID_URW)] + str r4, [vcpu, #CP15_OFFSET(c13_TID_URO)] + str r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)] + str r6, [vcpu, #CP15_OFFSET(c5_DFSR)] + str r7, [vcpu, #CP15_OFFSET(c5_IFSR)] + str r8, [vcpu, #CP15_OFFSET(c5_ADFSR)] + str r9, [vcpu, #CP15_OFFSET(c5_AIFSR)] + str r10, [vcpu, #CP15_OFFSET(c6_DFAR)] + str r11, [vcpu, #CP15_OFFSET(c6_IFAR)] + str r12, [vcpu, #CP15_OFFSET(c12_VBAR)] + .endif +.endm + +/* + * Reads cp15 registers from memory and writes them to hardware + * @read_from_vcpu: If 0, registers are read in-order from the stack, + * otherwise from the VCPU struct pointed to by vcpup + * + * Assumes vcpu pointer in vcpu reg + */ +.macro write_cp15_state read_from_vcpu + .if \read_from_vcpu == 0 + pop {r2-r12} + .else + ldr r2, [vcpu, #CP15_OFFSET(c13_CID)] + ldr r3, [vcpu, #CP15_OFFSET(c13_TID_URW)] + ldr r4, [vcpu, #CP15_OFFSET(c13_TID_URO)] + ldr r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)] + ldr r6, [vcpu, #CP15_OFFSET(c5_DFSR)] + ldr r7, [vcpu, #CP15_OFFSET(c5_IFSR)] + ldr r8, [vcpu, #CP15_OFFSET(c5_ADFSR)] + ldr r9, [vcpu, #CP15_OFFSET(c5_AIFSR)] + ldr r10, [vcpu, #CP15_OFFSET(c6_DFAR)] + ldr r11, [vcpu, #CP15_OFFSET(c6_IFAR)] + ldr r12, [vcpu, #CP15_OFFSET(c12_VBAR)] + .endif + + mcr p15, 0, r2, c13, c0, 1 @ CID + mcr p15, 0, r3, c13, c0, 2 @ TID_URW + mcr p15, 0, r4, c13, c0, 3 @ TID_URO + mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV + mcr p15, 0, r6, c5, c0, 0 @ DFSR + mcr p15, 0, r7, c5, c0, 1 @ IFSR + mcr p15, 0, r8, c5, c1, 0 @ ADFSR + mcr p15, 0, r9, c5, c1, 1 @ AIFSR + mcr p15, 0, r10, c6, c0, 0 @ DFAR + mcr p15, 0, r11, c6, c0, 2 @ IFAR + mcr p15, 0, r12, c12, c0, 0 @ VBAR + + .if \read_from_vcpu == 0 + pop {r2-r12} + .else + ldr r2, [vcpu, #CP15_OFFSET(c1_SCTLR)] + ldr r3, [vcpu, #CP15_OFFSET(c1_CPACR)] + ldr r4, [vcpu, #CP15_OFFSET(c2_TTBCR)] + ldr r5, [vcpu, #CP15_OFFSET(c3_DACR)] + add r12, vcpu, #CP15_OFFSET(c2_TTBR0) + ldrd r6, r7, [r12] + add r12, vcpu, #CP15_OFFSET(c2_TTBR1) + ldrd r8, r9, [r12] + ldr r10, [vcpu, #CP15_OFFSET(c10_PRRR)] + ldr r11, [vcpu, #CP15_OFFSET(c10_NMRR)] + ldr r12, [vcpu, #CP15_OFFSET(c0_CSSELR)] + .endif + + mcr p15, 0, r2, c1, c0, 0 @ SCTLR + mcr p15, 0, r3, c1, c0, 2 @ CPACR + mcr p15, 0, r4, c2, c0, 2 @ TTBCR + mcr p15, 0, r5, c3, c0, 0 @ DACR + mcrr p15, 0, r6, r7, c2 @ TTBR 0 + mcrr p15, 1, r8, r9, c2 @ TTBR 1 + mcr p15, 0, r10, c10, c2, 0 @ PRRR + mcr p15, 0, r11, c10, c2, 1 @ NMRR + mcr p15, 2, r12, c0, c0, 0 @ CSSELR +.endm + +/* + * Save the VGIC CPU state into memory + * + * Assumes vcpu pointer in vcpu reg + */ +.macro save_vgic_state +.endm + +/* + * Restore the VGIC CPU state from memory + * + * Assumes vcpu pointer in vcpu reg + */ +.macro restore_vgic_state +.endm + +.equ vmentry, 0 +.equ vmexit, 1 + +/* Configures the HSTR (Hyp System Trap Register) on entry/return + * (hardware reset value is 0) */ +.macro set_hstr operation + mrc p15, 4, r2, c1, c1, 3 + ldr r3, =HSTR_T(15) + .if \operation == vmentry + orr r2, r2, r3 @ Trap CR{15} + .else + bic r2, r2, r3 @ Don't trap any CRx accesses + .endif + mcr p15, 4, r2, c1, c1, 3 +.endm + +/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return + * (hardware reset value is 0). Keep previous value in r2. */ +.macro set_hcptr operation, mask + mrc p15, 4, r2, c1, c1, 2 + ldr r3, =\mask + .if \operation == vmentry + orr r3, r2, r3 @ Trap coproc-accesses defined in mask + .else + bic r3, r2, r3 @ Don't trap defined coproc-accesses + .endif + mcr p15, 4, r3, c1, c1, 2 +.endm + +/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return + * (hardware reset value is 0) */ +.macro set_hdcr operation + mrc p15, 4, r2, c1, c1, 1 + ldr r3, =(HDCR_TPM|HDCR_TPMCR) + .if \operation == vmentry + orr r2, r2, r3 @ Trap some perfmon accesses + .else + bic r2, r2, r3 @ Don't trap any perfmon accesses + .endif + mcr p15, 4, r2, c1, c1, 1 +.endm + +/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ +.macro configure_hyp_role operation + mrc p15, 4, r2, c1, c1, 0 @ HCR + bic r2, r2, #HCR_VIRT_EXCP_MASK + ldr r3, =HCR_GUEST_MASK + .if \operation == vmentry + orr r2, r2, r3 + ldr r3, [vcpu, #VCPU_IRQ_LINES] + orr r2, r2, r3 + .else + bic r2, r2, r3 + .endif + mcr p15, 4, r2, c1, c1, 0 +.endm + +.macro load_vcpu + mrc p15, 4, vcpu, c13, c0, 2 @ HTPIDR +.endm -- cgit v1.2.3