summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/acpi.c33
-rw-r--r--arch/arm64/kernel/asm-offsets.c10
-rw-r--r--arch/arm64/kernel/cpu_errata.c24
-rw-r--r--arch/arm64/kernel/cpufeature.c343
-rw-r--r--arch/arm64/kernel/cpuidle.c9
-rw-r--r--arch/arm64/kernel/cpuinfo.c40
-rw-r--r--arch/arm64/kernel/debug-monitors.c3
-rw-r--r--arch/arm64/kernel/efi-entry.S2
-rw-r--r--arch/arm64/kernel/head.S165
-rw-r--r--arch/arm64/kernel/hibernate-asm.S176
-rw-r--r--arch/arm64/kernel/hibernate.c487
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c8
-rw-r--r--arch/arm64/kernel/hyp-stub.S45
-rw-r--r--arch/arm64/kernel/image.h2
-rw-r--r--arch/arm64/kernel/insn.c2
-rw-r--r--arch/arm64/kernel/kaslr.c6
-rw-r--r--arch/arm64/kernel/pci.c10
-rw-r--r--arch/arm64/kernel/process.c18
-rw-r--r--arch/arm64/kernel/setup.c81
-rw-r--r--arch/arm64/kernel/sleep.S157
-rw-r--r--arch/arm64/kernel/smp.c78
-rw-r--r--arch/arm64/kernel/suspend.c102
-rw-r--r--arch/arm64/kernel/sys.c10
-rw-r--r--arch/arm64/kernel/vdso.c4
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S48
26 files changed, 1304 insertions, 560 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 3793003e16a2..2173149d8954 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -45,6 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index d1ce8e2f98b9..3e4f1a45b125 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -42,6 +42,7 @@ int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */
EXPORT_SYMBOL(acpi_pci_disabled);
static bool param_acpi_off __initdata;
+static bool param_acpi_on __initdata;
static bool param_acpi_force __initdata;
static int __init parse_acpi(char *arg)
@@ -52,6 +53,8 @@ static int __init parse_acpi(char *arg)
/* "acpi=off" disables both ACPI table parsing and interpreter */
if (strcmp(arg, "off") == 0)
param_acpi_off = true;
+ else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */
+ param_acpi_on = true;
else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
param_acpi_force = true;
else
@@ -66,12 +69,24 @@ static int __init dt_scan_depth1_nodes(unsigned long node,
void *data)
{
/*
- * Return 1 as soon as we encounter a node at depth 1 that is
- * not the /chosen node.
+ * Ignore anything not directly under the root node; we'll
+ * catch its parent instead.
*/
- if (depth == 1 && (strcmp(uname, "chosen") != 0))
- return 1;
- return 0;
+ if (depth != 1)
+ return 0;
+
+ if (strcmp(uname, "chosen") == 0)
+ return 0;
+
+ if (strcmp(uname, "hypervisor") == 0 &&
+ of_flat_dt_is_compatible(node, "xen,xen"))
+ return 0;
+
+ /*
+ * This node at depth 1 is neither a chosen node nor a xen node,
+ * which we do not expect.
+ */
+ return 1;
}
/*
@@ -184,11 +199,13 @@ void __init acpi_boot_table_init(void)
/*
* Enable ACPI instead of device tree unless
* - ACPI has been disabled explicitly (acpi=off), or
- * - the device tree is not empty (it has more than just a /chosen node)
- * and ACPI has not been force enabled (acpi=force)
+ * - the device tree is not empty (it has more than just a /chosen node,
+ * and a /hypervisor node when running on Xen)
+ * and ACPI has not been [force] enabled (acpi=on|force)
*/
if (param_acpi_off ||
- (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+ (!param_acpi_on && !param_acpi_force &&
+ of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
return;
/*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3ae6b310ac9b..f8e5d47f0880 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/suspend.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -119,11 +120,14 @@ int main(void)
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask));
DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff));
- DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
- DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
- DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
+ DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
+ DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0));
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
+ BLANK();
+ DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
+ DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
+ DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
return 0;
}
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 06afd04e02c0..d42789499f17 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -22,14 +22,16 @@
#include <asm/cpufeature.h>
static bool __maybe_unused
-is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
entry->midr_range_min,
entry->midr_range_max);
}
#define MIDR_RANGE(model, min, max) \
+ .def_scope = SCOPE_LOCAL_CPU, \
.matches = is_affected_midr_range, \
.midr_model = model, \
.midr_range_min = min, \
@@ -101,6 +103,26 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
}
};
+/*
+ * The CPU Errata work arounds are detected and applied at boot time
+ * and the related information is freed soon after. If the new CPU requires
+ * an errata not detected at boot, fail this CPU.
+ */
+void verify_local_cpu_errata(void)
+{
+ const struct arm64_cpu_capabilities *caps = arm64_errata;
+
+ for (; caps->matches; caps++)
+ if (!cpus_have_cap(caps->capability) &&
+ caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ pr_crit("CPU%d: Requires work around for %s, not detected"
+ " at boot time\n",
+ smp_processor_id(),
+ caps->desc ? : "an erratum");
+ cpu_die_early();
+ }
+}
+
void check_local_cpu_errata(void)
{
update_cpu_capabilities(arm64_errata, "enabling workaround for");
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 943f5140e0f3..811773d1c1d0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -71,7 +71,8 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
/* meta feature for alternatives */
static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
+
static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
@@ -130,7 +131,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
};
static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -435,22 +440,26 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
- init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
- init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
- init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
- init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
- init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
- init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
- init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
- init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
- init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
- init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
- init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
- init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
- init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
- init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
- init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
- init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+ init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+ init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+ init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+ init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+ init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+ init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+ init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+ init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+ init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+ init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+ init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+ init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+ init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+ init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+ init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+ }
+
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -555,47 +564,51 @@ void update_cpu_features(int cpu,
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
/*
- * If we have AArch32, we care about 32-bit features for compat. These
- * registers should be RES0 otherwise.
+ * If we have AArch32, we care about 32-bit features for compat.
+ * If the system doesn't support AArch32, don't update them.
*/
- taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+ if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) &&
+ id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+
+ taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
info->reg_id_dfr0, boot->reg_id_dfr0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
info->reg_id_isar0, boot->reg_id_isar0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
info->reg_id_isar1, boot->reg_id_isar1);
- taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
info->reg_id_isar2, boot->reg_id_isar2);
- taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
info->reg_id_isar3, boot->reg_id_isar3);
- taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
info->reg_id_isar4, boot->reg_id_isar4);
- taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
info->reg_id_isar5, boot->reg_id_isar5);
- /*
- * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
- * ACTLR formats could differ across CPUs and therefore would have to
- * be trapped for virtualization anyway.
- */
- taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+ /*
+ * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+ * ACTLR formats could differ across CPUs and therefore would have to
+ * be trapped for virtualization anyway.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
info->reg_id_mmfr0, boot->reg_id_mmfr0);
- taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
info->reg_id_mmfr1, boot->reg_id_mmfr1);
- taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
info->reg_id_mmfr2, boot->reg_id_mmfr2);
- taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
info->reg_id_mmfr3, boot->reg_id_mmfr3);
- taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
info->reg_id_pfr0, boot->reg_id_pfr0);
- taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
info->reg_id_pfr1, boot->reg_id_pfr1);
- taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
info->reg_mvfr0, boot->reg_mvfr0);
- taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
info->reg_mvfr1, boot->reg_mvfr1);
- taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
info->reg_mvfr2, boot->reg_mvfr2);
+ }
/*
* Mismatched CPU features are a recipe for disaster. Don't even
@@ -614,6 +627,49 @@ u64 read_system_reg(u32 id)
return regp->sys_val;
}
+/*
+ * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ * Read the system register on the current CPU
+ */
+static u64 __raw_read_system_reg(u32 sys_id)
+{
+ switch (sys_id) {
+ case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1);
+ case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1);
+ case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1);
+ case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1);
+ case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1);
+ case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1);
+ case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1);
+ case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1);
+ case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1);
+ case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1);
+ case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1);
+ case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1);
+ case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1);
+ case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1);
+ case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1);
+ case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1);
+
+ case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1);
+ case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1);
+ case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1);
+ case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1);
+ case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1);
+ case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1);
+ case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1);
+
+ case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0);
+ case SYS_CTR_EL0: return read_cpuid(CTR_EL0);
+ case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0);
+ default:
+ BUG();
+ return 0;
+ }
+}
+
#include <linux/irqchip/arm-gic-v3.h>
static bool
@@ -625,19 +681,24 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
}
static bool
-has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
{
u64 val;
- val = read_system_reg(entry->sys_reg);
+ WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
+ if (scope == SCOPE_SYSTEM)
+ val = read_system_reg(entry->sys_reg);
+ else
+ val = __raw_read_system_reg(entry->sys_reg);
+
return feature_matches(val, entry);
}
-static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
+static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
{
bool has_sre;
- if (!has_cpuid_feature(entry))
+ if (!has_cpuid_feature(entry, scope))
return false;
has_sre = gic_enable_sre();
@@ -648,7 +709,7 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
return has_sre;
}
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
{
u32 midr = read_cpuid_id();
u32 rv_min, rv_max;
@@ -660,7 +721,7 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
}
-static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
{
return is_kernel_in_hyp_mode();
}
@@ -669,6 +730,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
+ .def_scope = SCOPE_SYSTEM,
.matches = has_useable_gicv3_cpuif,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_GIC_SHIFT,
@@ -679,6 +741,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "Privileged Access Never",
.capability = ARM64_HAS_PAN,
+ .def_scope = SCOPE_SYSTEM,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.field_pos = ID_AA64MMFR1_PAN_SHIFT,
@@ -691,6 +754,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "LSE atomic instructions",
.capability = ARM64_HAS_LSE_ATOMICS,
+ .def_scope = SCOPE_SYSTEM,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
@@ -701,12 +765,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "Software prefetching using PRFM",
.capability = ARM64_HAS_NO_HW_PREFETCH,
+ .def_scope = SCOPE_SYSTEM,
.matches = has_no_hw_prefetch,
},
#ifdef CONFIG_ARM64_UAO
{
.desc = "User Access Override",
.capability = ARM64_HAS_UAO,
+ .def_scope = SCOPE_SYSTEM,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.field_pos = ID_AA64MMFR2_UAO_SHIFT,
@@ -717,20 +783,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
#ifdef CONFIG_ARM64_PAN
{
.capability = ARM64_ALT_PAN_NOT_UAO,
+ .def_scope = SCOPE_SYSTEM,
.matches = cpufeature_pan_not_uao,
},
#endif /* CONFIG_ARM64_PAN */
{
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
+ .def_scope = SCOPE_SYSTEM,
.matches = runs_at_el2,
},
+ {
+ .desc = "32-bit EL0 Support",
+ .capability = ARM64_HAS_32BIT_EL0,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR0_EL0_SHIFT,
+ .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+ },
{},
};
#define HWCAP_CAP(reg, field, s, min_value, type, cap) \
{ \
.desc = #cap, \
+ .def_scope = SCOPE_SYSTEM, \
.matches = has_cpuid_feature, \
.sys_reg = reg, \
.field_pos = field, \
@@ -740,7 +819,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.hwcap = cap, \
}
-static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
+static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
@@ -751,6 +830,10 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+ {},
+};
+
+static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
#ifdef CONFIG_COMPAT
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
@@ -761,7 +844,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
{},
};
-static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
+static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
{
switch (cap->hwcap_type) {
case CAP_HWCAP:
@@ -782,7 +865,7 @@ static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
}
/* Check if we have a particular HWCAP enabled */
-static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
+static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
{
bool rc;
@@ -806,28 +889,23 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *
return rc;
}
-static void __init setup_cpu_hwcaps(void)
+static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
{
- int i;
- const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
-
- for (i = 0; hwcaps[i].matches; i++)
- if (hwcaps[i].matches(&hwcaps[i]))
- cap_set_hwcap(&hwcaps[i]);
+ for (; hwcaps->matches; hwcaps++)
+ if (hwcaps->matches(hwcaps, hwcaps->def_scope))
+ cap_set_elf_hwcap(hwcaps);
}
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info)
{
- int i;
-
- for (i = 0; caps[i].matches; i++) {
- if (!caps[i].matches(&caps[i]))
+ for (; caps->matches; caps++) {
+ if (!caps->matches(caps, caps->def_scope))
continue;
- if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
- pr_info("%s %s\n", info, caps[i].desc);
- cpus_set_cap(caps[i].capability);
+ if (!cpus_have_cap(caps->capability) && caps->desc)
+ pr_info("%s %s\n", info, caps->desc);
+ cpus_set_cap(caps->capability);
}
}
@@ -838,11 +916,9 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
static void __init
enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
{
- int i;
-
- for (i = 0; caps[i].matches; i++)
- if (caps[i].enable && cpus_have_cap(caps[i].capability))
- on_each_cpu(caps[i].enable, NULL, true);
+ for (; caps->matches; caps++)
+ if (caps->enable && cpus_have_cap(caps->capability))
+ on_each_cpu(caps->enable, NULL, true);
}
/*
@@ -861,54 +937,45 @@ static inline void set_sys_caps_initialised(void)
}
/*
- * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ * Check for CPU features that are used in early boot
+ * based on the Boot CPU value.
*/
-static u64 __raw_read_system_reg(u32 sys_id)
+static void check_early_cpu_features(void)
{
- switch (sys_id) {
- case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1);
- case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1);
- case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1);
- case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1);
- case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1);
- case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1);
- case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1);
- case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1);
- case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1);
- case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1);
- case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1);
- case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1);
- case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1);
- case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1);
- case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1);
- case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1);
+ verify_cpu_run_el();
+ verify_cpu_asid_bits();
+}
- case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1);
- case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1);
- case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1);
- case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1);
- case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1);
- case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1);
- case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1);
+static void
+verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
+{
- case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0);
- case SYS_CTR_EL0: return read_cpuid(CTR_EL0);
- case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0);
- default:
- BUG();
- return 0;
- }
+ for (; caps->matches; caps++)
+ if (cpus_have_elf_hwcap(caps) && !caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ pr_crit("CPU%d: missing HWCAP: %s\n",
+ smp_processor_id(), caps->desc);
+ cpu_die_early();
+ }
}
-/*
- * Check for CPU features that are used in early boot
- * based on the Boot CPU value.
- */
-static void check_early_cpu_features(void)
+static void
+verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
{
- verify_cpu_asid_bits();
+ for (; caps->matches; caps++) {
+ if (!cpus_have_cap(caps->capability))
+ continue;
+ /*
+ * If the new CPU misses an advertised feature, we cannot proceed
+ * further, park the cpu.
+ */
+ if (!caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ pr_crit("CPU%d: missing feature: %s\n",
+ smp_processor_id(), caps->desc);
+ cpu_die_early();
+ }
+ if (caps->enable)
+ caps->enable(NULL);
+ }
}
/*
@@ -921,8 +988,6 @@ static void check_early_cpu_features(void)
*/
void verify_local_cpu_capabilities(void)
{
- int i;
- const struct arm64_cpu_capabilities *caps;
check_early_cpu_features();
@@ -933,32 +998,11 @@ void verify_local_cpu_capabilities(void)
if (!sys_caps_initialised)
return;
- caps = arm64_features;
- for (i = 0; caps[i].matches; i++) {
- if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
- continue;
- /*
- * If the new CPU misses an advertised feature, we cannot proceed
- * further, park the cpu.
- */
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
- pr_crit("CPU%d: missing feature: %s\n",
- smp_processor_id(), caps[i].desc);
- cpu_die_early();
- }
- if (caps[i].enable)
- caps[i].enable(NULL);
- }
-
- for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
- if (!cpus_have_hwcap(&caps[i]))
- continue;
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
- pr_crit("CPU%d: missing HWCAP: %s\n",
- smp_processor_id(), caps[i].desc);
- cpu_die_early();
- }
- }
+ verify_local_cpu_errata();
+ verify_local_cpu_features(arm64_features);
+ verify_local_elf_hwcaps(arm64_elf_hwcaps);
+ if (system_supports_32bit_el0())
+ verify_local_elf_hwcaps(compat_elf_hwcaps);
}
static void __init setup_feature_capabilities(void)
@@ -967,6 +1011,24 @@ static void __init setup_feature_capabilities(void)
enable_cpu_capabilities(arm64_features);
}
+/*
+ * Check if the current CPU has a given feature capability.
+ * Should be called from non-preemptible context.
+ */
+bool this_cpu_has_cap(unsigned int cap)
+{
+ const struct arm64_cpu_capabilities *caps;
+
+ if (WARN_ON(preemptible()))
+ return false;
+
+ for (caps = arm64_features; caps->desc; caps++)
+ if (caps->capability == cap && caps->matches)
+ return caps->matches(caps, SCOPE_LOCAL_CPU);
+
+ return false;
+}
+
void __init setup_cpu_features(void)
{
u32 cwg;
@@ -974,7 +1036,10 @@ void __init setup_cpu_features(void)
/* Set the CPU feature capabilies */
setup_feature_capabilities();
- setup_cpu_hwcaps();
+ setup_elf_hwcaps(arm64_elf_hwcaps);
+
+ if (system_supports_32bit_el0())
+ setup_elf_hwcaps(compat_elf_hwcaps);
/* Advertise that we have computed the system capabilities */
set_sys_caps_initialised();
@@ -993,7 +1058,7 @@ void __init setup_cpu_features(void)
}
static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
{
return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
}
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 9047cab68fd3..e11857fce05f 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -19,7 +19,8 @@ int __init arm_cpuidle_init(unsigned int cpu)
{
int ret = -EOPNOTSUPP;
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend &&
+ cpu_ops[cpu]->cpu_init_idle)
ret = cpu_ops[cpu]->cpu_init_idle(cpu);
return ret;
@@ -36,11 +37,5 @@ int arm_cpuidle_suspend(int index)
{
int cpu = smp_processor_id();
- /*
- * If cpu_ops have not been registered or suspend
- * has not been initialized, cpu_suspend call fails early.
- */
- if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
- return -EOPNOTSUPP;
return cpu_ops[cpu]->cpu_suspend(index);
}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 84c8684431c7..3808470486f3 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -87,7 +87,8 @@ static const char *const compat_hwcap_str[] = {
"idivt",
"vfpd32",
"lpae",
- "evtstrm"
+ "evtstrm",
+ NULL
};
static const char *const compat_hwcap2_str[] = {
@@ -216,23 +217,26 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
- info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
- info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
- info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
- info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
- info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
- info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
- info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
- info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
- info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
- info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
- info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
- info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
- info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
-
- info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
- info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
- info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+ /* Update the 32bit ID registers only if AArch32 is implemented */
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+ info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+ info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+ info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+ info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+ info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+ info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+ info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+ info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+ info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+ info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+ info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+ info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+
+ info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+ info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+ info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+ }
cpuinfo_detect_icache_policy(info);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c45f2968bc8c..4fbf3c54275c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -135,9 +135,8 @@ static void clear_os_lock(void *unused)
static int os_lock_notify(struct notifier_block *self,
unsigned long action, void *data)
{
- int cpu = (unsigned long)data;
if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
- smp_call_function_single(cpu, clear_os_lock, NULL, 1);
+ clear_os_lock(NULL);
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index cae3112f7791..e88c064b845c 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -62,7 +62,7 @@ ENTRY(entry)
*/
mov x20, x0 // DTB address
ldr x0, [sp, #16] // relocated _text address
- movz x21, #:abs_g0:stext_offset
+ ldr w21, =stext_offset
add x21, x0, x21
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 85da0f599cd6..2c6e598a94dc 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -25,6 +25,7 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/boot.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
@@ -51,9 +52,6 @@
#error TEXT_OFFSET must be less than 2MB
#endif
-#define KERNEL_START _text
-#define KERNEL_END _end
-
/*
* Kernel startup entry point.
* ---------------------------
@@ -102,8 +100,6 @@ _head:
#endif
#ifdef CONFIG_EFI
- .globl __efistub_stext_offset
- .set __efistub_stext_offset, stext - _head
.align 3
pe_header:
.ascii "PE"
@@ -123,11 +119,11 @@ optional_header:
.short 0x20b // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
- .long _end - stext // SizeOfCode
+ .long _end - efi_header_end // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long __efistub_entry - _head // AddressOfEntryPoint
- .long __efistub_stext_offset // BaseOfCode
+ .long efi_header_end - _head // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
@@ -144,7 +140,7 @@ extra_header_fields:
.long _end - _head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long __efistub_stext_offset // SizeOfHeaders
+ .long efi_header_end - _head // SizeOfHeaders
.long 0 // CheckSum
.short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics
@@ -188,10 +184,10 @@ section_table:
.byte 0
.byte 0
.byte 0 // end of 0 padding of section name
- .long _end - stext // VirtualSize
- .long __efistub_stext_offset // VirtualAddress
- .long _edata - stext // SizeOfRawData
- .long __efistub_stext_offset // PointerToRawData
+ .long _end - efi_header_end // VirtualSize
+ .long efi_header_end - _head // VirtualAddress
+ .long _edata - efi_header_end // SizeOfRawData
+ .long efi_header_end - _head // PointerToRawData
.long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables)
@@ -200,20 +196,23 @@ section_table:
.long 0xe0500020 // Characteristics (section flags)
/*
- * EFI will load stext onwards at the 4k section alignment
+ * EFI will load .text onwards at the 4k section alignment
* described in the PE/COFF header. To ensure that instruction
* sequences using an adrp and a :lo12: immediate will function
- * correctly at this alignment, we must ensure that stext is
+ * correctly at this alignment, we must ensure that .text is
* placed at a 4k boundary in the Image to begin with.
*/
.align 12
+efi_header_end:
#endif
+ __INIT
+
ENTRY(stext)
bl preserve_boot_args
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
- mov x23, xzr // KASLR offset, defaults to 0
adrp x24, __PHYS_OFFSET
+ and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
bl set_cpu_boot_mode_flag
bl __create_page_tables // x25=TTBR0, x26=TTBR1
/*
@@ -222,13 +221,11 @@ ENTRY(stext)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
- ldr x27, 0f // address to jump to after
+ bl __cpu_setup // initialise processor
+ adr_l x27, __primary_switch // address to jump to after
// MMU has been enabled
- adr_l lr, __enable_mmu // return (PIC) address
- b __cpu_setup // initialise processor
+ b __enable_mmu
ENDPROC(stext)
- .align 3
-0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
@@ -338,7 +335,7 @@ __create_page_tables:
cmp x0, x6
b.lo 1b
- ldr x7, =SWAPPER_MM_MMUFLAGS
+ mov x7, SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
@@ -394,12 +391,13 @@ __create_page_tables:
* Map the kernel image (starting with PHYS_OFFSET).
*/
mov x0, x26 // swapper_pg_dir
- ldr x5, =KIMAGE_VADDR
+ mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
create_pgd_entry x0, x5, x3, x6
- ldr w6, kernel_img_size
- add x6, x6, x5
- mov x3, x24 // phys offset
+ adrp x6, _end // runtime __pa(_end)
+ adrp x3, _text // runtime __pa(_text)
+ sub x6, x6, x3 // _end - _text
+ add x6, x6, x5 // runtime __va(_end)
create_block_map x0, x7, x3, x5, x6
/*
@@ -414,16 +412,13 @@ __create_page_tables:
ret x28
ENDPROC(__create_page_tables)
-
-kernel_img_size:
- .long _end - (_head - TEXT_OFFSET)
.ltorg
/*
* The following fragment of code is executed with the MMU enabled.
*/
.set initial_sp, init_thread_union + THREAD_START_SP
-__mmap_switched:
+__primary_switched:
mov x28, lr // preserve LR
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
@@ -437,44 +432,6 @@ __mmap_switched:
bl __pi_memset
dsb ishst // Make zero page visible to PTW
-#ifdef CONFIG_RELOCATABLE
-
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x8, __dynsym_start // start of symbol table
- adr_l x9, __reloc_start // start of reloc table
- adr_l x10, __reloc_end // end of reloc table
-
-0: cmp x9, x10
- b.hs 2f
- ldp x11, x12, [x9], #24
- ldr x13, [x9, #-8]
- cmp w12, #R_AARCH64_RELATIVE
- b.ne 1f
- add x13, x13, x23 // relocate
- str x13, [x11, x23]
- b 0b
-
-1: cmp w12, #R_AARCH64_ABS64
- b.ne 0b
- add x12, x12, x12, lsl #1 // symtab offset: 24x top word
- add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
- ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
- ldr x15, [x12, #8] // Elf64_Sym::st_value
- cmp w14, #-0xf // SHN_ABS (0xfff1) ?
- add x14, x15, x23 // relocate
- csel x15, x14, x15, ne
- add x15, x13, x15
- str x15, [x11, x23]
- b 0b
-
-2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr
- dc cvac, x8 // value visible to secondaries
- dsb sy // with MMU off
-#endif
-
adr_l sp, initial_sp, x4
mov x4, sp
and x4, x4, #~(THREAD_SIZE - 1)
@@ -490,17 +447,19 @@ __mmap_switched:
bl kasan_early_init
#endif
#ifdef CONFIG_RANDOMIZE_BASE
- cbnz x23, 0f // already running randomized?
+ tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
+ b.ne 0f
mov x0, x21 // pass FDT address in x0
+ mov x1, x23 // pass modulo offset in x1
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
- mov x23, x0 // record KASLR offset
+ orr x23, x23, x0 // record KASLR offset
ret x28 // we must enable KASLR, return
// to __enable_mmu()
0:
#endif
b start_kernel
-ENDPROC(__mmap_switched)
+ENDPROC(__primary_switched)
/*
* end early head section, begin head code that is also used for
@@ -650,7 +609,7 @@ ENDPROC(el2_setup)
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
* in x20. See arch/arm64/include/asm/virt.h for more info.
*/
-ENTRY(set_cpu_boot_mode_flag)
+set_cpu_boot_mode_flag:
adr_l x1, __boot_cpu_mode
cmp w20, #BOOT_CPU_MODE_EL2
b.ne 1f
@@ -683,7 +642,7 @@ ENTRY(secondary_holding_pen)
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
- ldr x1, =MPIDR_HWID_BITMASK
+ mov_q x1, MPIDR_HWID_BITMASK
and x0, x0, x1
adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3]
@@ -703,7 +662,7 @@ ENTRY(secondary_entry)
b secondary_startup
ENDPROC(secondary_entry)
-ENTRY(secondary_startup)
+secondary_startup:
/*
* Common entry point for secondary CPUs.
*/
@@ -711,14 +670,11 @@ ENTRY(secondary_startup)
adrp x26, swapper_pg_dir
bl __cpu_setup // initialise processor
- ldr x8, kimage_vaddr
- ldr w9, 0f
- sub x27, x8, w9, sxtw // address to jump to after enabling the MMU
+ adr_l x27, __secondary_switch // address to jump to after enabling the MMU
b __enable_mmu
ENDPROC(secondary_startup)
-0: .long (_text - TEXT_OFFSET) - __secondary_switched
-ENTRY(__secondary_switched)
+__secondary_switched:
adr_l x5, vectors
msr vbar_el1, x5
isb
@@ -768,7 +724,7 @@ ENTRY(__early_cpu_boot_status)
* If it isn't, park the CPU
*/
.section ".idmap.text", "ax"
-__enable_mmu:
+ENTRY(__enable_mmu)
mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
@@ -806,7 +762,6 @@ __enable_mmu:
ic iallu // flush instructions fetched
dsb nsh // via old mapping
isb
- add x27, x27, x23 // relocated __mmap_switched
#endif
br x27
ENDPROC(__enable_mmu)
@@ -819,3 +774,53 @@ __no_granule_support:
wfi
b 1b
ENDPROC(__no_granule_support)
+
+__primary_switch:
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Iterate over each entry in the relocation table, and apply the
+ * relocations in place.
+ */
+ ldr w8, =__dynsym_offset // offset to symbol table
+ ldr w9, =__rela_offset // offset to reloc table
+ ldr w10, =__rela_size // size of reloc table
+
+ mov_q x11, KIMAGE_VADDR // default virtual offset
+ add x11, x11, x23 // actual virtual offset
+ add x8, x8, x11 // __va(.dynsym)
+ add x9, x9, x11 // __va(.rela)
+ add x10, x9, x10 // __va(.rela) + sizeof(.rela)
+
+0: cmp x9, x10
+ b.hs 2f
+ ldp x11, x12, [x9], #24
+ ldr x13, [x9, #-8]
+ cmp w12, #R_AARCH64_RELATIVE
+ b.ne 1f
+ add x13, x13, x23 // relocate
+ str x13, [x11, x23]
+ b 0b
+
+1: cmp w12, #R_AARCH64_ABS64
+ b.ne 0b
+ add x12, x12, x12, lsl #1 // symtab offset: 24x top word
+ add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
+ ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
+ ldr x15, [x12, #8] // Elf64_Sym::st_value
+ cmp w14, #-0xf // SHN_ABS (0xfff1) ?
+ add x14, x15, x23 // relocate
+ csel x15, x14, x15, ne
+ add x15, x13, x15
+ str x15, [x11, x23]
+ b 0b
+
+2:
+#endif
+ ldr x8, =__primary_switched
+ br x8
+ENDPROC(__primary_switch)
+
+__secondary_switch:
+ ldr x8, =__secondary_switched
+ br x8
+ENDPROC(__secondary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
new file mode 100644
index 000000000000..46f29b6560ec
--- /dev/null
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -0,0 +1,176 @@
+/*
+ * Hibernate low-level support
+ *
+ * Copyright (C) 2016 ARM Ltd.
+ * Author: James Morse <james.morse@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/cputype.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/virt.h>
+
+/*
+ * To prevent the possibility of old and new partial table walks being visible
+ * in the tlb, switch the ttbr to a zero page when we invalidate the old
+ * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
+ * Even switching to our copied tables will cause a changed output address at
+ * each stage of the walk.
+ */
+.macro break_before_make_ttbr_switch zero_page, page_table
+ msr ttbr1_el1, \zero_page
+ isb
+ tlbi vmalle1is
+ dsb ish
+ msr ttbr1_el1, \page_table
+ isb
+.endm
+
+
+/*
+ * Resume from hibernate
+ *
+ * Loads temporary page tables then restores the memory image.
+ * Finally branches to cpu_resume() to restore the state saved by
+ * swsusp_arch_suspend().
+ *
+ * Because this code has to be copied to a 'safe' page, it can't call out to
+ * other functions by PC-relative address. Also remember that it may be
+ * mid-way through over-writing other functions. For this reason it contains
+ * code from flush_icache_range() and uses the copy_page() macro.
+ *
+ * This 'safe' page is mapped via ttbr0, and executed from there. This function
+ * switches to a copy of the linear map in ttbr1, performs the restore, then
+ * switches ttbr1 to the original kernel's swapper_pg_dir.
+ *
+ * All of memory gets written to, including code. We need to clean the kernel
+ * text to the Point of Coherence (PoC) before secondary cores can be booted.
+ * Because the kernel modules and executable pages mapped to user space are
+ * also written as data, we clean all pages we touch to the Point of
+ * Unification (PoU).
+ *
+ * x0: physical address of temporary page tables
+ * x1: physical address of swapper page tables
+ * x2: address of cpu_resume
+ * x3: linear map address of restore_pblist in the current kernel
+ * x4: physical address of __hyp_stub_vectors, or 0
+ * x5: physical address of a zero page that remains zero after resume
+ */
+.pushsection ".hibernate_exit.text", "ax"
+ENTRY(swsusp_arch_suspend_exit)
+ /*
+ * We execute from ttbr0, change ttbr1 to our copied linear map tables
+ * with a break-before-make via the zero page
+ */
+ break_before_make_ttbr_switch x5, x0
+
+ mov x21, x1
+ mov x30, x2
+ mov x24, x4
+ mov x25, x5
+
+ /* walk the restore_pblist and use copy_page() to over-write memory */
+ mov x19, x3
+
+1: ldr x10, [x19, #HIBERN_PBE_ORIG]
+ mov x0, x10
+ ldr x1, [x19, #HIBERN_PBE_ADDR]
+
+ copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
+
+ add x1, x10, #PAGE_SIZE
+ /* Clean the copied page to PoU - based on flush_icache_range() */
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x4, x10, x3
+2: dc cvau, x4 /* clean D line / unified line */
+ add x4, x4, x2
+ cmp x4, x1
+ b.lo 2b
+
+ ldr x19, [x19, #HIBERN_PBE_NEXT]
+ cbnz x19, 1b
+ dsb ish /* wait for PoU cleaning to finish */
+
+ /* switch to the restored kernels page tables */
+ break_before_make_ttbr_switch x25, x21
+
+ ic ialluis
+ dsb ish
+ isb
+
+ cbz x24, 3f /* Do we need to re-initialise EL2? */
+ hvc #0
+3: ret
+
+ .ltorg
+ENDPROC(swsusp_arch_suspend_exit)
+
+/*
+ * Restore the hyp stub.
+ * This must be done before the hibernate page is unmapped by _cpu_resume(),
+ * but happens before any of the hyp-stub's code is cleaned to PoC.
+ *
+ * x24: The physical address of __hyp_stub_vectors
+ */
+el1_sync:
+ msr vbar_el2, x24
+ eret
+ENDPROC(el1_sync)
+
+.macro invalid_vector label
+\label:
+ b \label
+ENDPROC(\label)
+.endm
+
+ invalid_vector el2_sync_invalid
+ invalid_vector el2_irq_invalid
+ invalid_vector el2_fiq_invalid
+ invalid_vector el2_error_invalid
+ invalid_vector el1_sync_invalid
+ invalid_vector el1_irq_invalid
+ invalid_vector el1_fiq_invalid
+ invalid_vector el1_error_invalid
+
+/* el2 vectors - switch el2 here while we restore the memory image. */
+ .align 11
+ENTRY(hibernate_el2_vectors)
+ ventry el2_sync_invalid // Synchronous EL2t
+ ventry el2_irq_invalid // IRQ EL2t
+ ventry el2_fiq_invalid // FIQ EL2t
+ ventry el2_error_invalid // Error EL2t
+
+ ventry el2_sync_invalid // Synchronous EL2h
+ ventry el2_irq_invalid // IRQ EL2h
+ ventry el2_fiq_invalid // FIQ EL2h
+ ventry el2_error_invalid // Error EL2h
+
+ ventry el1_sync // Synchronous 64-bit EL1
+ ventry el1_irq_invalid // IRQ 64-bit EL1
+ ventry el1_fiq_invalid // FIQ 64-bit EL1
+ ventry el1_error_invalid // Error 64-bit EL1
+
+ ventry el1_sync_invalid // Synchronous 32-bit EL1
+ ventry el1_irq_invalid // IRQ 32-bit EL1
+ ventry el1_fiq_invalid // FIQ 32-bit EL1
+ ventry el1_error_invalid // Error 32-bit EL1
+END(hibernate_el2_vectors)
+
+.popsection
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
new file mode 100644
index 000000000000..f8df75d740f4
--- /dev/null
+++ b/arch/arm64/kernel/hibernate.c
@@ -0,0 +1,487 @@
+/*:
+ * Hibernate support specific for ARM64
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ * https://lkml.org/lkml/2010/6/18/4
+ * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ * https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#define pr_fmt(x) "hibernate: " x
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/pm.h>
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/irqflags.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+#include <asm/virt.h>
+
+/*
+ * Hibernate core relies on this value being 0 on resume, and marks it
+ * __nosavedata assuming it will keep the resume kernel's '0' value. This
+ * doesn't happen with either KASLR.
+ *
+ * defined as "__visible int in_suspend __nosavedata" in
+ * kernel/power/hibernate.c
+ */
+extern int in_suspend;
+
+/* Find a symbols alias in the linear map */
+#define LMADDR(x) phys_to_virt(virt_to_phys(x))
+
+/* Do we need to reset el2? */
+#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
+
+/*
+ * Start/end of the hibernate exit code, this must be copied to a 'safe'
+ * location in memory, and executed from there.
+ */
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+
+/* temporary el2 vectors in the __hibernate_exit_text section. */
+extern char hibernate_el2_vectors[];
+
+/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
+extern char __hyp_stub_vectors[];
+
+/*
+ * Values that may not change over hibernate/resume. We put the build number
+ * and date in here so that we guarantee not to resume with a different
+ * kernel.
+ */
+struct arch_hibernate_hdr_invariants {
+ char uts_version[__NEW_UTS_LEN + 1];
+};
+
+/* These values need to be know across a hibernate/restore. */
+static struct arch_hibernate_hdr {
+ struct arch_hibernate_hdr_invariants invariants;
+
+ /* These are needed to find the relocated kernel if built with kaslr */
+ phys_addr_t ttbr1_el1;
+ void (*reenter_kernel)(void);
+
+ /*
+ * We need to know where the __hyp_stub_vectors are after restore to
+ * re-configure el2.
+ */
+ phys_addr_t __hyp_stub_vectors;
+} resume_hdr;
+
+static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
+{
+ memset(i, 0, sizeof(*i));
+ memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+ unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+ return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+}
+
+void notrace restore_processor_state(void)
+{
+}
+
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct arch_hibernate_hdr *hdr = addr;
+
+ if (max_size < sizeof(*hdr))
+ return -EOVERFLOW;
+
+ arch_hdr_invariants(&hdr->invariants);
+ hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir);
+ hdr->reenter_kernel = _cpu_resume;
+
+ /* We can't use __hyp_get_vectors() because kvm may still be loaded */
+ if (el2_reset_needed())
+ hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors);
+ else
+ hdr->__hyp_stub_vectors = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_save);
+
+int arch_hibernation_header_restore(void *addr)
+{
+ struct arch_hibernate_hdr_invariants invariants;
+ struct arch_hibernate_hdr *hdr = addr;
+
+ arch_hdr_invariants(&invariants);
+ if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
+ pr_crit("Hibernate image not generated by this kernel!\n");
+ return -EINVAL;
+ }
+
+ resume_hdr = *hdr;
+
+ return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_restore);
+
+/*
+ * Copies length bytes, starting at src_start into an new page,
+ * perform cache maintentance, then maps it at the specified address low
+ * address as executable.
+ *
+ * This is used by hibernate to copy the code it needs to execute when
+ * overwriting the kernel text. This function generates a new set of page
+ * tables, which it loads into ttbr0.
+ *
+ * Length is provided as we probably only want 4K of data, even on a 64K
+ * page system.
+ */
+static int create_safe_exec_page(void *src_start, size_t length,
+ unsigned long dst_addr,
+ phys_addr_t *phys_dst_addr,
+ void *(*allocator)(gfp_t mask),
+ gfp_t mask)
+{
+ int rc = 0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long dst = (unsigned long)allocator(mask);
+
+ if (!dst) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy((void *)dst, src_start, length);
+ flush_icache_range(dst, dst + length);
+
+ pgd = pgd_offset_raw(allocator(mask), dst_addr);
+ if (pgd_none(*pgd)) {
+ pud = allocator(mask);
+ if (!pud) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pgd_populate(&init_mm, pgd, pud);
+ }
+
+ pud = pud_offset(pgd, dst_addr);
+ if (pud_none(*pud)) {
+ pmd = allocator(mask);
+ if (!pmd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pud_populate(&init_mm, pud, pmd);
+ }
+
+ pmd = pmd_offset(pud, dst_addr);
+ if (pmd_none(*pmd)) {
+ pte = allocator(mask);
+ if (!pte) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pmd_populate_kernel(&init_mm, pmd, pte);
+ }
+
+ pte = pte_offset_kernel(pmd, dst_addr);
+ set_pte(pte, __pte(virt_to_phys((void *)dst) |
+ pgprot_val(PAGE_KERNEL_EXEC)));
+
+ /* Load our new page tables */
+ asm volatile("msr ttbr0_el1, %0;"
+ "isb;"
+ "tlbi vmalle1is;"
+ "dsb ish;"
+ "isb" : : "r"(virt_to_phys(pgd)));
+
+ *phys_dst_addr = virt_to_phys((void *)dst);
+
+out:
+ return rc;
+}
+
+
+int swsusp_arch_suspend(void)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct sleep_stack_data state;
+
+ local_dbg_save(flags);
+
+ if (__cpu_suspend_enter(&state)) {
+ ret = swsusp_save();
+ } else {
+ /* Clean kernel to PoC for secondary core startup */
+ __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
+
+ /*
+ * Tell the hibernation core that we've just restored
+ * the memory
+ */
+ in_suspend = 0;
+
+ __cpu_suspend_exit();
+ }
+
+ local_dbg_restore(flags);
+
+ return ret;
+}
+
+static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+ unsigned long end)
+{
+ pte_t *src_pte;
+ pte_t *dst_pte;
+ unsigned long addr = start;
+
+ dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pte)
+ return -ENOMEM;
+ pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
+ dst_pte = pte_offset_kernel(dst_pmd, start);
+
+ src_pte = pte_offset_kernel(src_pmd, start);
+ do {
+ if (!pte_none(*src_pte))
+ /*
+ * Resume will overwrite areas that may be marked
+ * read only (code, rodata). Clear the RDONLY bit from
+ * the temporary mappings we use during restore.
+ */
+ set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY));
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+
+ return 0;
+}
+
+static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+ unsigned long end)
+{
+ pmd_t *src_pmd;
+ pmd_t *dst_pmd;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (pud_none(*dst_pud)) {
+ dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pmd)
+ return -ENOMEM;
+ pud_populate(&init_mm, dst_pud, dst_pmd);
+ }
+ dst_pmd = pmd_offset(dst_pud, start);
+
+ src_pmd = pmd_offset(src_pud, start);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*src_pmd))
+ continue;
+ if (pmd_table(*src_pmd)) {
+ if (copy_pte(dst_pmd, src_pmd, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pmd(dst_pmd,
+ __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+ }
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+ unsigned long end)
+{
+ pud_t *dst_pud;
+ pud_t *src_pud;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (pgd_none(*dst_pgd)) {
+ dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pud)
+ return -ENOMEM;
+ pgd_populate(&init_mm, dst_pgd, dst_pud);
+ }
+ dst_pud = pud_offset(dst_pgd, start);
+
+ src_pud = pud_offset(src_pgd, start);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*src_pud))
+ continue;
+ if (pud_table(*(src_pud))) {
+ if (copy_pmd(dst_pud, src_pud, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pud(dst_pud,
+ __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+ }
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+ unsigned long end)
+{
+ unsigned long next;
+ unsigned long addr = start;
+ pgd_t *src_pgd = pgd_offset_k(start);
+
+ dst_pgd = pgd_offset_raw(dst_pgd, start);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*src_pgd))
+ continue;
+ if (copy_pud(dst_pgd, src_pgd, addr, next))
+ return -ENOMEM;
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+
+ return 0;
+}
+
+/*
+ * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
+ *
+ * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
+ * we don't need to free it here.
+ */
+int swsusp_arch_resume(void)
+{
+ int rc = 0;
+ void *zero_page;
+ size_t exit_size;
+ pgd_t *tmp_pg_dir;
+ void *lm_restore_pblist;
+ phys_addr_t phys_hibernate_exit;
+ void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
+ void *, phys_addr_t, phys_addr_t);
+
+ /*
+ * Locate the exit code in the bottom-but-one page, so that *NULL
+ * still has disastrous affects.
+ */
+ hibernate_exit = (void *)PAGE_SIZE;
+ exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
+ /*
+ * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
+ * a new set of ttbr0 page tables and load them.
+ */
+ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
+ (unsigned long)hibernate_exit,
+ &phys_hibernate_exit,
+ (void *)get_safe_page, GFP_ATOMIC);
+ if (rc) {
+ pr_err("Failed to create safe executable page for hibernate_exit code.");
+ goto out;
+ }
+
+ /*
+ * The hibernate exit text contains a set of el2 vectors, that will
+ * be executed at el2 with the mmu off in order to reload hyp-stub.
+ */
+ __flush_dcache_area(hibernate_exit, exit_size);
+
+ /*
+ * Restoring the memory image will overwrite the ttbr1 page tables.
+ * Create a second copy of just the linear map, and use this when
+ * restoring.
+ */
+ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!tmp_pg_dir) {
+ pr_err("Failed to allocate memory for temporary page tables.");
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+ if (rc)
+ goto out;
+
+ /*
+ * Since we only copied the linear map, we need to find restore_pblist's
+ * linear map address.
+ */
+ lm_restore_pblist = LMADDR(restore_pblist);
+
+ /*
+ * KASLR will cause the el2 vectors to be in a different location in
+ * the resumed kernel. Load hibernate's temporary copy into el2.
+ *
+ * We can skip this step if we booted at EL1, or are running with VHE.
+ */
+ if (el2_reset_needed()) {
+ phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
+ el2_vectors += hibernate_el2_vectors -
+ __hibernate_exit_text_start; /* offset */
+
+ __hyp_set_vectors(el2_vectors);
+ }
+
+ /*
+ * We need a zero page that is zero before & after resume in order to
+ * to break before make on the ttbr1 page tables.
+ */
+ zero_page = (void *)get_safe_page(GFP_ATOMIC);
+
+ hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
+ resume_hdr.reenter_kernel, lm_restore_pblist,
+ resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
+
+out:
+ return rc;
+}
+
+static int check_boot_cpu_online_pm_callback(struct notifier_block *nb,
+ unsigned long action, void *ptr)
+{
+ if (action == PM_HIBERNATION_PREPARE &&
+ cpumask_first(cpu_online_mask) != 0) {
+ pr_warn("CPU0 is offline.\n");
+ return notifier_from_errno(-ENODEV);
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init check_boot_cpu_online_init(void)
+{
+ /*
+ * Set this pm_notifier callback with a lower priority than
+ * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be
+ * called earlier to disable cpu hotplug before the cpu online check.
+ */
+ pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX);
+
+ return 0;
+}
+core_initcall(check_boot_cpu_online_init);
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 4ef5373f9a76..ce21aa88263f 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -886,9 +886,11 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self,
unsigned long action,
void *hcpu)
{
- int cpu = (long)hcpu;
- if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
- smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
+ if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) {
+ local_irq_disable();
+ hw_breakpoint_reset(NULL);
+ local_irq_enable();
+ }
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f335c289..8727f4490772 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -22,6 +22,8 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
#include <asm/ptrace.h>
#include <asm/virt.h>
@@ -53,15 +55,26 @@ ENDPROC(__hyp_stub_vectors)
.align 11
el1_sync:
- mrs x1, esr_el2
- lsr x1, x1, #26
- cmp x1, #0x16
- b.ne 2f // Not an HVC trap
- cbz x0, 1f
- msr vbar_el2, x0 // Set vbar_el2
- b 2f
-1: mrs x0, vbar_el2 // Return vbar_el2
-2: eret
+ mrs x30, esr_el2
+ lsr x30, x30, #ESR_ELx_EC_SHIFT
+
+ cmp x30, #ESR_ELx_EC_HVC64
+ b.ne 9f // Not an HVC trap
+
+ cmp x0, #HVC_GET_VECTORS
+ b.ne 1f
+ mrs x0, vbar_el2
+ b 9f
+
+1: cmp x0, #HVC_SET_VECTORS
+ b.ne 2f
+ msr vbar_el2, x1
+ b 9f
+
+ /* Someone called kvm_call_hyp() against the hyp-stub... */
+2: mov x0, #ARM_EXCEPTION_HYP_GONE
+
+9: eret
ENDPROC(el1_sync)
.macro invalid_vector label
@@ -101,10 +114,18 @@ ENDPROC(\label)
*/
ENTRY(__hyp_get_vectors)
- mov x0, xzr
- // fall through
-ENTRY(__hyp_set_vectors)
+ str lr, [sp, #-16]!
+ mov x0, #HVC_GET_VECTORS
hvc #0
+ ldr lr, [sp], #16
ret
ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+ str lr, [sp, #-16]!
+ mov x1, x0
+ mov x0, #HVC_SET_VECTORS
+ hvc #0
+ ldr lr, [sp], #16
+ ret
ENDPROC(__hyp_set_vectors)
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 1428849aece8..c7fcb232fe47 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -73,6 +73,8 @@
#ifdef CONFIG_EFI
+__efistub_stext_offset = stext - _text;
+
/*
* Prevent the symbol aliases below from being emitted into the kallsyms
* table, by forcing them to be absolute symbols (which are conveniently
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 7371455160e5..368c08290dd8 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap)
if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
page = vmalloc_to_page(addr);
else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
- page = virt_to_page(addr);
+ page = pfn_to_page(PHYS_PFN(__pa(addr)));
else
return addr;
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 582983920054..b05469173ba5 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -74,7 +74,7 @@ extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
-u64 __init kaslr_early_init(u64 dt_phys)
+u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
{
void *fdt;
u64 seed, offset, mask, module_range;
@@ -132,8 +132,8 @@ u64 __init kaslr_early_init(u64 dt_phys)
* boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
* happens, increase the KASLR offset by the size of the kernel image.
*/
- if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
- (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+ if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) !=
+ (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT))
offset = (offset + (u64)(_end - _text)) & mask;
if (IS_ENABLED(CONFIG_KASAN))
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index c72de668e1d4..3c4e308b40a0 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -74,6 +74,16 @@ int raw_pci_write(unsigned int domain, unsigned int bus,
return -ENXIO;
}
+#ifdef CONFIG_NUMA
+
+int pcibus_to_node(struct pci_bus *bus)
+{
+ return dev_to_node(&bus->dev);
+}
+EXPORT_SYMBOL(pcibus_to_node);
+
+#endif
+
#ifdef CONFIG_ACPI
/* Root bridge scanning */
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 80624829db61..48eea6866c67 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -265,9 +265,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
- /* 16-byte aligned stack mandatory on AArch64 */
- else if (stack_start & 15)
- return -EINVAL;
else
childregs->sp = stack_start;
}
@@ -382,13 +379,14 @@ unsigned long arch_align_stack(unsigned long sp)
return sp & ~0xf;
}
-static unsigned long randomize_base(unsigned long base)
-{
- unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1;
- return randomize_range(base, range_end, 0) ? : base;
-}
-
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- return randomize_base(mm->brk);
+ unsigned long range_end = mm->brk;
+
+ if (is_compat_task())
+ range_end += 0x02000000;
+ else
+ range_end += 0x40000000;
+
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 9dc67769b6a4..3279defabaa2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -53,6 +53,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
+#include <asm/numa.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -175,7 +176,6 @@ static void __init smp_build_mpidr_hash(void)
*/
if (mpidr_hash_size() > 4 * num_possible_cpus())
pr_warn("Large number of MPIDR hash buckets detected\n");
- __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
@@ -224,69 +224,6 @@ static void __init request_standard_resources(void)
}
}
-#ifdef CONFIG_BLK_DEV_INITRD
-/*
- * Relocate initrd if it is not completely within the linear mapping.
- * This would be the case if mem= cuts out all or part of it.
- */
-static void __init relocate_initrd(void)
-{
- phys_addr_t orig_start = __virt_to_phys(initrd_start);
- phys_addr_t orig_end = __virt_to_phys(initrd_end);
- phys_addr_t ram_end = memblock_end_of_DRAM();
- phys_addr_t new_start;
- unsigned long size, to_free = 0;
- void *dest;
-
- if (orig_end <= ram_end)
- return;
-
- /*
- * Any of the original initrd which overlaps the linear map should
- * be freed after relocating.
- */
- if (orig_start < ram_end)
- to_free = ram_end - orig_start;
-
- size = orig_end - orig_start;
- if (!size)
- return;
-
- /* initrd needs to be relocated completely inside linear mapping */
- new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn),
- size, PAGE_SIZE);
- if (!new_start)
- panic("Cannot relocate initrd of size %ld\n", size);
- memblock_reserve(new_start, size);
-
- initrd_start = __phys_to_virt(new_start);
- initrd_end = initrd_start + size;
-
- pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n",
- orig_start, orig_start + size - 1,
- new_start, new_start + size - 1);
-
- dest = (void *)initrd_start;
-
- if (to_free) {
- memcpy(dest, (void *)__phys_to_virt(orig_start), to_free);
- dest += to_free;
- }
-
- copy_from_early_mem(dest, orig_start + to_free, size - to_free);
-
- if (to_free) {
- pr_info("Freeing original RAMDISK from [%llx-%llx]\n",
- orig_start, orig_start + to_free - 1);
- memblock_free(orig_start, to_free);
- }
-}
-#else
-static inline void __init relocate_initrd(void)
-{
-}
-#endif
-
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p)
@@ -327,7 +264,11 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_table_init();
paging_init();
- relocate_initrd();
+
+ if (acpi_disabled)
+ unflatten_device_tree();
+
+ bootmem_init();
kasan_init();
@@ -335,12 +276,11 @@ void __init setup_arch(char **cmdline_p)
early_ioremap_reset();
- if (acpi_disabled) {
- unflatten_device_tree();
+ if (acpi_disabled)
psci_dt_init();
- } else {
+ else
psci_acpi_init();
- }
+
xen_early_init();
cpu_read_bootcpu_ops();
@@ -379,6 +319,9 @@ static int __init topology_init(void)
{
int i;
+ for_each_online_node(i)
+ register_one_node(i);
+
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
cpu->hotpluggable = 1;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index fd10eb663868..9a3aec97ac09 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -49,39 +49,32 @@
orr \dst, \dst, \mask // dst|=(aff3>>rs3)
.endm
/*
- * Save CPU state for a suspend and execute the suspend finisher.
- * On success it will return 0 through cpu_resume - ie through a CPU
- * soft/hard reboot from the reset vector.
- * On failure it returns the suspend finisher return value or force
- * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
- * is not allowed to return, if it does this must be considered failure).
- * It saves callee registers, and allocates space on the kernel stack
- * to save the CPU specific registers + some other data for resume.
+ * Save CPU state in the provided sleep_stack_data area, and publish its
+ * location for cpu_resume()'s use in sleep_save_stash.
*
- * x0 = suspend finisher argument
- * x1 = suspend finisher function pointer
+ * cpu_resume() will restore this saved state, and return. Because the
+ * link-register is saved and restored, it will appear to return from this
+ * function. So that the caller can tell the suspend/resume paths apart,
+ * __cpu_suspend_enter() will always return a non-zero value, whereas the
+ * path through cpu_resume() will return 0.
+ *
+ * x0 = struct sleep_stack_data area
*/
ENTRY(__cpu_suspend_enter)
- stp x29, lr, [sp, #-96]!
- stp x19, x20, [sp,#16]
- stp x21, x22, [sp,#32]
- stp x23, x24, [sp,#48]
- stp x25, x26, [sp,#64]
- stp x27, x28, [sp,#80]
- /*
- * Stash suspend finisher and its argument in x20 and x19
- */
- mov x19, x0
- mov x20, x1
+ stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
+ stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
+ stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
+ stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48]
+ stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64]
+ stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80]
+
+ /* save the sp in cpu_suspend_ctx */
mov x2, sp
- sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx
- mov x0, sp
- /*
- * x0 now points to struct cpu_suspend_ctx allocated on the stack
- */
- str x2, [x0, #CPU_CTX_SP]
- ldr x1, =sleep_save_sp
- ldr x1, [x1, #SLEEP_SAVE_SP_VIRT]
+ str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
+
+ /* find the mpidr_hash */
+ ldr x1, =sleep_save_stash
+ ldr x1, [x1]
mrs x7, mpidr_el1
ldr x9, =mpidr_hash
ldr x10, [x9, #MPIDR_HASH_MASK]
@@ -93,74 +86,28 @@ ENTRY(__cpu_suspend_enter)
ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
add x1, x1, x8, lsl #3
- bl __cpu_suspend_save
- /*
- * Grab suspend finisher in x20 and its argument in x19
- */
- mov x0, x19
- mov x1, x20
- /*
- * We are ready for power down, fire off the suspend finisher
- * in x1, with argument in x0
- */
- blr x1
- /*
- * Never gets here, unless suspend finisher fails.
- * Successful cpu_suspend should return from cpu_resume, returning
- * through this code path is considered an error
- * If the return value is set to 0 force x0 = -EOPNOTSUPP
- * to make sure a proper error condition is propagated
- */
- cmp x0, #0
- mov x3, #-EOPNOTSUPP
- csel x0, x3, x0, eq
- add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
+
+ str x0, [x1]
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
+ stp x29, lr, [sp, #-16]!
+ bl cpu_do_suspend
+ ldp x29, lr, [sp], #16
+ mov x0, #1
ret
ENDPROC(__cpu_suspend_enter)
.ltorg
-/*
- * x0 must contain the sctlr value retrieved from restored context
- */
- .pushsection ".idmap.text", "ax"
-ENTRY(cpu_resume_mmu)
- ldr x3, =cpu_resume_after_mmu
- msr sctlr_el1, x0 // restore sctlr_el1
- isb
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
- br x3 // global jump to virtual address
-ENDPROC(cpu_resume_mmu)
- .popsection
-cpu_resume_after_mmu:
-#ifdef CONFIG_KASAN
- mov x0, sp
- bl kasan_unpoison_remaining_stack
-#endif
- mov x0, #0 // return zero on success
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
- ret
-ENDPROC(cpu_resume_after_mmu)
-
ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
+ /* enable the MMU early - so we can access sleep_save_stash by va */
+ adr_l lr, __enable_mmu /* __cpu_setup will return here */
+ ldr x27, =_cpu_resume /* __enable_mmu will branch here */
+ adrp x25, idmap_pg_dir
+ adrp x26, swapper_pg_dir
+ b __cpu_setup
+ENDPROC(cpu_resume)
+
+ENTRY(_cpu_resume)
mrs x1, mpidr_el1
adrp x8, mpidr_hash
add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
@@ -170,20 +117,32 @@ ENTRY(cpu_resume)
ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
/* x7 contains hash index, let's use it to grab context pointer */
- ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
+ ldr_l x0, sleep_save_stash
ldr x0, [x0, x7, lsl #3]
+ add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
- /* load physical address of identity map page table in x1 */
- adrp x1, idmap_pg_dir
mov sp, x2
/* save thread_info */
and x2, x2, #~(THREAD_SIZE - 1)
msr sp_el0, x2
/*
- * cpu_do_resume expects x0 to contain context physical address
- * pointer and x1 to contain physical address of 1:1 page tables
+ * cpu_do_resume expects x0 to contain context address pointer
*/
- bl cpu_do_resume // PC relative jump, MMU off
- b cpu_resume_mmu // Resume MMU, never returns
-ENDPROC(cpu_resume)
+ bl cpu_do_resume
+
+#ifdef CONFIG_KASAN
+ mov x0, sp
+ bl kasan_unpoison_remaining_stack
+#endif
+
+ ldp x19, x20, [x29, #16]
+ ldp x21, x22, [x29, #32]
+ ldp x23, x24, [x29, #48]
+ ldp x25, x26, [x29, #64]
+ ldp x27, x28, [x29, #80]
+ ldp x29, lr, [x29]
+ mov x0, #0
+ ret
+ENDPROC(_cpu_resume)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b2d5f4ee9a1c..678e0842cb3b 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -45,6 +45,7 @@
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/mmu_context.h>
+#include <asm/numa.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
@@ -75,6 +76,43 @@ enum ipi_msg_type {
IPI_WAKEUP
};
+#ifdef CONFIG_ARM64_VHE
+
+/* Whether the boot CPU is running in HYP mode or not*/
+static bool boot_cpu_hyp_mode;
+
+static inline void save_boot_cpu_run_el(void)
+{
+ boot_cpu_hyp_mode = is_kernel_in_hyp_mode();
+}
+
+static inline bool is_boot_cpu_in_hyp_mode(void)
+{
+ return boot_cpu_hyp_mode;
+}
+
+/*
+ * Verify that a secondary CPU is running the kernel at the same
+ * EL as that of the boot CPU.
+ */
+void verify_cpu_run_el(void)
+{
+ bool in_el2 = is_kernel_in_hyp_mode();
+ bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode();
+
+ if (in_el2 ^ boot_cpu_el2) {
+ pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n",
+ smp_processor_id(),
+ in_el2 ? 2 : 1,
+ boot_cpu_el2 ? 2 : 1);
+ cpu_panic_kernel();
+ }
+}
+
+#else
+static inline void save_boot_cpu_run_el(void) {}
+#endif
+
#ifdef CONFIG_HOTPLUG_CPU
static int op_cpu_kill(unsigned int cpu);
#else
@@ -166,6 +204,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
static void smp_store_cpu_info(unsigned int cpuid)
{
store_cpu_topology(cpuid);
+ numa_store_cpu_info(cpuid);
}
/*
@@ -225,8 +264,6 @@ asmlinkage void secondary_start_kernel(void)
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
update_cpu_boot_status(CPU_BOOT_SUCCESS);
- /* Make sure the status update is visible before we complete */
- smp_wmb();
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -401,6 +438,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
cpuinfo_store_boot_cpu();
+ save_boot_cpu_run_el();
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
}
@@ -595,6 +633,8 @@ static void __init of_parse_and_init_cpus(void)
pr_debug("cpu logical map 0x%llx\n", hwid);
cpu_logical_map(cpu_count) = hwid;
+
+ early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
next:
cpu_count++;
}
@@ -647,33 +687,18 @@ void __init smp_init_cpus(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int err;
- unsigned int cpu, ncores = num_possible_cpus();
+ unsigned int cpu;
init_cpu_topology();
smp_store_cpu_info(smp_processor_id());
/*
- * are we trying to boot more cores than exist?
- */
- if (max_cpus > ncores)
- max_cpus = ncores;
-
- /* Don't bother if we're effectively UP */
- if (max_cpus <= 1)
- return;
-
- /*
* Initialise the present map (which describes the set of CPUs
* actually populated at the present time) and release the
* secondaries from the bootloader.
- *
- * Make sure we online at most (max_cpus - 1) additional CPUs.
*/
- max_cpus--;
for_each_possible_cpu(cpu) {
- if (max_cpus == 0)
- break;
if (cpu == smp_processor_id())
continue;
@@ -686,7 +711,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
continue;
set_cpu_present(cpu, true);
- max_cpus--;
}
}
@@ -763,21 +787,11 @@ void arch_irq_work_raise(void)
}
#endif
-static DEFINE_RAW_SPINLOCK(stop_lock);
-
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
*/
static void ipi_cpu_stop(unsigned int cpu)
{
- if (system_state == SYSTEM_BOOTING ||
- system_state == SYSTEM_RUNNING) {
- raw_spin_lock(&stop_lock);
- pr_crit("CPU%u: stopping\n", cpu);
- dump_stack();
- raw_spin_unlock(&stop_lock);
- }
-
set_cpu_online(cpu, false);
local_irq_disable();
@@ -872,6 +886,9 @@ void smp_send_stop(void)
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
+ if (system_state == SYSTEM_BOOTING ||
+ system_state == SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
smp_cross_call(&mask, IPI_CPU_STOP);
}
@@ -881,7 +898,8 @@ void smp_send_stop(void)
udelay(1);
if (num_online_cpus() > 1)
- pr_warning("SMP: failed to stop secondary CPUs\n");
+ pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(cpu_online_mask));
}
/*
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 66055392f445..b616e365cee3 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -10,30 +10,11 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
-extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
/*
- * This is called by __cpu_suspend_enter() to save the state, and do whatever
- * flushing is required to ensure that when the CPU goes to sleep we have
- * the necessary data available when the caches are not searched.
- *
- * ptr: CPU context virtual address
- * save_ptr: address of the location where the context physical address
- * must be saved
+ * This is allocated by cpu_suspend_init(), and used to store a pointer to
+ * the 'struct sleep_stack_data' the contains a particular CPUs state.
*/
-void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
- phys_addr_t *save_ptr)
-{
- *save_ptr = virt_to_phys(ptr);
-
- cpu_do_suspend(ptr);
- /*
- * Only flush the context that must be retrieved with the MMU
- * off. VA primitives ensure the flush is applied to all
- * cache levels so context is pushed to DRAM.
- */
- __flush_dcache_area(ptr, sizeof(*ptr));
- __flush_dcache_area(save_ptr, sizeof(*save_ptr));
-}
+unsigned long *sleep_save_stash;
/*
* This hook is provided so that cpu_suspend code can restore HW
@@ -51,6 +32,30 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
hw_breakpoint_restore = hw_bp_restore;
}
+void notrace __cpu_suspend_exit(void)
+{
+ /*
+ * We are resuming from reset with the idmap active in TTBR0_EL1.
+ * We must uninstall the idmap and restore the expected MMU
+ * state before we can possibly return to userspace.
+ */
+ cpu_uninstall_idmap();
+
+ /*
+ * Restore per-cpu offset before any kernel
+ * subsystem relying on it has a chance to run.
+ */
+ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+
+ /*
+ * Restore HW breakpoint registers to sane values
+ * before debug exceptions are possibly reenabled
+ * through local_dbg_restore.
+ */
+ if (hw_breakpoint_restore)
+ hw_breakpoint_restore(NULL);
+}
+
/*
* cpu_suspend
*
@@ -60,8 +65,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
*/
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
- int ret;
+ int ret = 0;
unsigned long flags;
+ struct sleep_stack_data state;
/*
* From this point debug exceptions are disabled to prevent
@@ -77,34 +83,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
*/
pause_graph_tracing();
- /*
- * mm context saved on the stack, it will be restored when
- * the cpu comes out of reset through the identity mapped
- * page tables, so that the thread address space is properly
- * set-up on function return.
- */
- ret = __cpu_suspend_enter(arg, fn);
- if (ret == 0) {
- /*
- * We are resuming from reset with the idmap active in TTBR0_EL1.
- * We must uninstall the idmap and restore the expected MMU
- * state before we can possibly return to userspace.
- */
- cpu_uninstall_idmap();
+ if (__cpu_suspend_enter(&state)) {
+ /* Call the suspend finisher */
+ ret = fn(arg);
/*
- * Restore per-cpu offset before any kernel
- * subsystem relying on it has a chance to run.
+ * Never gets here, unless the suspend finisher fails.
+ * Successful cpu_suspend() should return from cpu_resume(),
+ * returning through this code path is considered an error
+ * If the return value is set to 0 force ret = -EOPNOTSUPP
+ * to make sure a proper error condition is propagated
*/
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
- /*
- * Restore HW breakpoint registers to sane values
- * before debug exceptions are possibly reenabled
- * through local_dbg_restore.
- */
- if (hw_breakpoint_restore)
- hw_breakpoint_restore(NULL);
+ if (!ret)
+ ret = -EOPNOTSUPP;
+ } else {
+ __cpu_suspend_exit();
}
unpause_graph_tracing();
@@ -119,22 +112,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
return ret;
}
-struct sleep_save_sp sleep_save_sp;
-
static int __init cpu_suspend_init(void)
{
- void *ctx_ptr;
-
/* ctx_ptr is an array of physical addresses */
- ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+ sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash),
+ GFP_KERNEL);
- if (WARN_ON(!ctx_ptr))
+ if (WARN_ON(!sleep_save_stash))
return -ENOMEM;
- sleep_save_sp.save_ptr_stash = ctx_ptr;
- sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
- __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
-
return 0;
}
early_initcall(cpu_suspend_init);
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index 75151aaf1a52..26fe8ea93ea2 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -25,6 +25,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
+#include <asm/cpufeature.h>
asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -36,11 +37,20 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
}
+SYSCALL_DEFINE1(arm64_personality, unsigned int, personality)
+{
+ if (personality(personality) == PER_LINUX32 &&
+ !system_supports_32bit_el0())
+ return -EINVAL;
+ return sys_personality(personality);
+}
+
/*
* Wrappers to pass the pt_regs argument.
*/
asmlinkage long sys_rt_sigreturn_wrapper(void);
#define sys_rt_sigreturn sys_rt_sigreturn_wrapper
+#define sys_personality sys_arm64_personality
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 97bc68f4c689..64fc030be0f2 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -131,11 +131,11 @@ static int __init vdso_init(void)
return -ENOMEM;
/* Grab the vDSO data page. */
- vdso_pagelist[0] = virt_to_page(vdso_data);
+ vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data)));
/* Grab the vDSO code pages. */
for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE);
+ vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
/* Populate the special mapping structures */
vdso_spec[0] = (struct vm_special_mapping) {
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5a1939a74ff3..435e820e898d 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -46,6 +46,16 @@ jiffies = jiffies_64;
*(.idmap.text) \
VMLINUX_SYMBOL(__idmap_text_end) = .;
+#ifdef CONFIG_HIBERNATION
+#define HIBERNATE_TEXT \
+ . = ALIGN(SZ_4K); \
+ VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+ *(.hibernate_exit.text) \
+ VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+#else
+#define HIBERNATE_TEXT
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -63,14 +73,19 @@ PECOFF_FILE_ALIGNMENT = 0x200;
#endif
#if defined(CONFIG_DEBUG_ALIGN_RODATA)
-#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
-#elif defined(CONFIG_DEBUG_RODATA)
-#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
+/*
+ * 4 KB granule: 1 level 2 entry
+ * 16 KB granule: 128 level 3 entries, with contiguous bit
+ * 64 KB granule: 32 level 3 entries, with contiguous bit
+ */
+#define SEGMENT_ALIGN SZ_2M
#else
-#define ALIGN_DEBUG_RO
-#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
+/*
+ * 4 KB granule: 16 level 3 entries, with contiguous bit
+ * 16 KB granule: 4 level 3 entries, without contiguous bit
+ * 64 KB granule: 1 level 3 entry
+ */
+#define SEGMENT_ALIGN SZ_64K
#endif
SECTIONS
@@ -96,7 +111,6 @@ SECTIONS
_text = .;
HEAD_TEXT
}
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
@@ -109,18 +123,19 @@ SECTIONS
LOCK_TEXT
HYPERVISOR_TEXT
IDMAP_TEXT
+ HIBERNATE_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
*(.got) /* Global offset table */
}
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ . = ALIGN(SEGMENT_ALIGN);
RO_DATA(PAGE_SIZE) /* everything from this point to */
EXCEPTION_TABLE(8) /* _etext will be marked RO NX */
NOTES
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ . = ALIGN(SEGMENT_ALIGN);
_etext = .; /* End of text and rodata section */
__init_begin = .;
@@ -154,12 +169,9 @@ SECTIONS
*(.altinstr_replacement)
}
.rela : ALIGN(8) {
- __reloc_start = .;
*(.rela .rela*)
- __reloc_end = .;
}
.dynsym : ALIGN(8) {
- __dynsym_start = .;
*(.dynsym)
}
.dynstr : {
@@ -169,7 +181,11 @@ SECTIONS
*(.hash)
}
- . = ALIGN(PAGE_SIZE);
+ __rela_offset = ADDR(.rela) - KIMAGE_VADDR;
+ __rela_size = SIZEOF(.rela);
+ __dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR;
+
+ . = ALIGN(SEGMENT_ALIGN);
__init_end = .;
_data = .;
@@ -201,6 +217,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"HYP init code too big or misaligned")
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
+#ifdef CONFIG_HIBERNATION
+ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
+ <= SZ_4K, "Hibernate exit text too big or misaligned")
+#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.