summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/compressed/head_32.S4
-rw-r--r--arch/x86/boot/compressed/head_64.S18
-rw-r--r--arch/x86/boot/main.c1
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/entry/entry_64.S8
-rw-r--r--arch/x86/entry/thunk_32.S2
-rw-r--r--arch/x86/entry/thunk_64.S4
-rw-r--r--arch/x86/events/core.c34
-rw-r--r--arch/x86/events/intel/core.c18
-rw-r--r--arch/x86/events/intel/cstate.c4
-rw-r--r--arch/x86/events/intel/ds.c51
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/pt.c330
-rw-r--r--arch/x86/events/intel/pt.h12
-rw-r--r--arch/x86/events/intel/rapl.c2
-rw-r--r--arch/x86/events/msr.c2
-rw-r--r--arch/x86/events/perf_event.h17
-rw-r--r--arch/x86/hyperv/mmu.c8
-rw-r--r--arch/x86/include/asm/bootparam_utils.h1
-rw-r--r--arch/x86/include/asm/div64.h13
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/error-injection.h13
-rw-r--r--arch/x86/include/asm/intel_pt.h2
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/qspinlock.h15
-rw-r--r--arch/x86/include/asm/uaccess.h4
-rw-r--r--arch/x86/include/asm/uv/uv.h4
-rw-r--r--arch/x86/include/uapi/asm/errno.h1
-rw-r--r--arch/x86/include/uapi/asm/fcntl.h1
-rw-r--r--arch/x86/include/uapi/asm/ioctl.h1
-rw-r--r--arch/x86/include/uapi/asm/ioctls.h1
-rw-r--r--arch/x86/include/uapi/asm/ipcbuf.h1
-rw-r--r--arch/x86/include/uapi/asm/param.h1
-rw-r--r--arch/x86/include/uapi/asm/resource.h1
-rw-r--r--arch/x86/include/uapi/asm/termbits.h1
-rw-r--r--arch/x86/include/uapi/asm/termios.h1
-rw-r--r--arch/x86/include/uapi/asm/types.h7
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S10
-rw-r--r--arch/x86/kernel/amd_nb.c3
-rw-r--r--arch/x86/kernel/apic/apic.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c4
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/pci-dma.c20
-rw-r--r--arch/x86/kernel/umip.c65
-rw-r--r--arch/x86/kvm/mmu.c101
-rw-r--r--arch/x86/kvm/vmx/nested.c4
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/lib/copy_user_64.S14
-rw-r--r--arch/x86/lib/getuser.S16
-rw-r--r--arch/x86/lib/putuser.S22
-rw-r--r--arch/x86/mm/ioremap.c1
-rw-r--r--arch/x86/pci/mmconfig-shared.c5
-rw-r--r--arch/x86/platform/efi/efi.c39
-rw-r--r--arch/x86/platform/uv/bios_uv.c10
-rw-r--r--arch/x86/purgatory/Makefile35
63 files changed, 687 insertions, 302 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 71c92db47c41..bf9cd83de777 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -171,7 +171,7 @@ config HAVE_MMIOTRACE_SUPPORT
config X86_DECODER_SELFTEST
bool "x86 instruction decoder selftest"
- depends on DEBUG_KERNEL && KPROBES
+ depends on DEBUG_KERNEL && INSTRUCTION_DECODER
depends on !COMPILE_TEST
---help---
Perform x86 instruction decoder selftests at build time.
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 19eca14b49a0..ca866f1cca2e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -28,8 +28,6 @@
#include "cpuflags.h"
/* Useful macros */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
extern struct setup_header hdr;
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 37380c0d5999..5e30eaaf8576 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -140,7 +140,7 @@ ENTRY(startup_32)
/*
* Jump to the relocated address.
*/
- leal relocated(%ebx), %eax
+ leal .Lrelocated(%ebx), %eax
jmp *%eax
ENDPROC(startup_32)
@@ -209,7 +209,7 @@ ENDPROC(efi32_stub_entry)
#endif
.text
-relocated:
+.Lrelocated:
/*
* Clear BSS (stack is currently empty)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 6233ae35d0d9..d98cd483377e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -87,7 +87,7 @@ ENTRY(startup_32)
call verify_cpu
testl %eax, %eax
- jnz no_longmode
+ jnz .Lno_longmode
/*
* Compute the delta between where we were compiled to run at
@@ -322,7 +322,7 @@ ENTRY(startup_64)
1: popq %rdi
subq $1b, %rdi
- call adjust_got
+ call .Ladjust_got
/*
* At this point we are in long mode with 4-level paging enabled,
@@ -421,7 +421,7 @@ trampoline_return:
/* The new adjustment is the relocation address */
movq %rbx, %rdi
- call adjust_got
+ call .Ladjust_got
/*
* Copy the compressed kernel to the end of our buffer
@@ -440,7 +440,7 @@ trampoline_return:
/*
* Jump to the relocated address.
*/
- leaq relocated(%rbx), %rax
+ leaq .Lrelocated(%rbx), %rax
jmp *%rax
#ifdef CONFIG_EFI_STUB
@@ -511,7 +511,7 @@ ENDPROC(efi64_stub_entry)
#endif
.text
-relocated:
+.Lrelocated:
/*
* Clear BSS (stack is currently empty)
@@ -548,7 +548,7 @@ relocated:
* first time we touch GOT).
* RDI is the new adjustment to apply.
*/
-adjust_got:
+.Ladjust_got:
/* Walk through the GOT adding the address to the entries */
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
@@ -622,7 +622,7 @@ ENTRY(trampoline_32bit_src)
movl %eax, %cr4
/* Calculate address of paging_enabled() once we are executing in the trampoline */
- leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+ leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
/* Prepare the stack for far return to Long Mode */
pushl $__KERNEL_CS
@@ -635,7 +635,7 @@ ENTRY(trampoline_32bit_src)
lret
.code64
-paging_enabled:
+.Lpaging_enabled:
/* Return from the trampoline */
jmp *%rdi
@@ -647,7 +647,7 @@ paging_enabled:
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
.code32
-no_longmode:
+.Lno_longmode:
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
hlt
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 996df3d586f0..e3add857c2c9 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -10,6 +10,7 @@
/*
* Main module for the real-mode kernel code
*/
+#include <linux/build_bug.h>
#include "boot.h"
#include "string.h"
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4f86928246e7..f83ca5aa8b77 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -63,7 +63,7 @@
* enough to patch inline, increasing performance.
*/
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
@@ -1084,7 +1084,7 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz .Lno_preempt
@@ -1364,7 +1364,7 @@ ENTRY(xen_hypervisor_callback)
ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
#endif
jmp ret_from_intr
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index be9ca198c581..b7c3ea4cb19d 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -664,7 +664,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
/* Returning to kernel space */
retint_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* Interrupts are off */
/* Check if we need preemption */
btl $9, EFLAGS(%rsp) /* were interrupts off? */
@@ -1058,10 +1058,10 @@ ENTRY(native_load_gs_index)
ENDPROC(native_load_gs_index)
EXPORT_SYMBOL(native_load_gs_index)
- _ASM_EXTABLE(.Lgs_change, bad_gs)
+ _ASM_EXTABLE(.Lgs_change, .Lbad_gs)
.section .fixup, "ax"
/* running with kernelgs */
-bad_gs:
+.Lbad_gs:
SWAPGS /* switch back to user gs */
.macro ZAP_GS
/* This can't be a string because the preprocessor needs to see it. */
@@ -1115,7 +1115,7 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
call xen_evtchn_do_upcall
LEAVE_IRQ_STACK
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
#endif
jmp error_exit
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index cb3464525b37..2713490611a3 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -34,7 +34,7 @@
THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
#endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cc20465b2867..ea5c4167086c 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -46,7 +46,7 @@
THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
#endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
@@ -55,7 +55,7 @@
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
- || defined(CONFIG_PREEMPT)
+ || defined(CONFIG_PREEMPTION)
.L_restore:
popq %r11
popq %r10
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 325959d19d9a..15b90b1a8fb1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1005,6 +1005,27 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
/* current number of events already accepted */
n = cpuc->n_events;
+ if (!cpuc->n_events)
+ cpuc->pebs_output = 0;
+
+ if (!cpuc->is_fake && leader->attr.precise_ip) {
+ /*
+ * For PEBS->PT, if !aux_event, the group leader (PT) went
+ * away, the group was broken down and this singleton event
+ * can't schedule any more.
+ */
+ if (is_pebs_pt(leader) && !leader->aux_event)
+ return -EINVAL;
+
+ /*
+ * pebs_output: 0: no PEBS so far, 1: PT, 2: DS
+ */
+ if (cpuc->pebs_output &&
+ cpuc->pebs_output != is_pebs_pt(leader) + 1)
+ return -EINVAL;
+
+ cpuc->pebs_output = is_pebs_pt(leader) + 1;
+ }
if (is_x86_event(leader)) {
if (n >= max_count)
@@ -2241,6 +2262,17 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
return 0;
}
+static int x86_pmu_aux_output_match(struct perf_event *event)
+{
+ if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
+ return 0;
+
+ if (x86_pmu.aux_output_match)
+ return x86_pmu.aux_output_match(event);
+
+ return 0;
+}
+
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
@@ -2266,6 +2298,8 @@ static struct pmu pmu = {
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
.check_period = x86_pmu_check_period,
+
+ .aux_output_match = x86_pmu_aux_output_match,
};
void arch_perf_update_userpage(struct perf_event *event,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 1fc01d1f0a9b..27ee47a7be66 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/intel-family.h>
+#include <asm/intel_pt.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
@@ -3298,6 +3299,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
}
+ if (event->attr.aux_output) {
+ if (!event->attr.precise_ip)
+ return -EINVAL;
+
+ event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
+ }
+
if (event->attr.type != PERF_TYPE_RAW)
return 0;
@@ -3816,6 +3824,14 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}
+static int intel_pmu_aux_output_match(struct perf_event *event)
+{
+ if (!x86_pmu.intel_cap.pebs_output_pt_available)
+ return 0;
+
+ return is_intel_pt_event(event);
+}
+
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3940,6 +3956,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.sched_task = intel_pmu_sched_task,
.check_period = intel_pmu_check_period,
+
+ .aux_output_match = intel_pmu_aux_output_match,
};
static __init void intel_clovertown_quirk(void)
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 104c093b282b..9f2f39003d96 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -446,7 +446,7 @@ static int cstate_cpu_init(unsigned int cpu)
return 0;
}
-const struct attribute_group *core_attr_update[] = {
+static const struct attribute_group *core_attr_update[] = {
&group_cstate_core_c1,
&group_cstate_core_c3,
&group_cstate_core_c6,
@@ -454,7 +454,7 @@ const struct attribute_group *core_attr_update[] = {
NULL,
};
-const struct attribute_group *pkg_attr_update[] = {
+static const struct attribute_group *pkg_attr_update[] = {
&group_cstate_pkg_c2,
&group_cstate_pkg_c3,
&group_cstate_pkg_c6,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index f1269e804e9b..ce83950036c5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -902,6 +902,9 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
*/
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
+ if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
+ return false;
+
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}
@@ -919,6 +922,9 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
u64 threshold;
int reserved;
+ if (cpuc->n_pebs_via_pt)
+ return;
+
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
else
@@ -1059,10 +1065,40 @@ void intel_pmu_pebs_add(struct perf_event *event)
cpuc->n_pebs++;
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs++;
+ if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
+ cpuc->n_pebs_via_pt++;
pebs_update_state(needed_cb, cpuc, event, true);
}
+static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!is_pebs_pt(event))
+ return;
+
+ if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
+ cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
+}
+
+static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
+
+ if (!is_pebs_pt(event))
+ return;
+
+ if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
+ cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
+
+ cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
+
+ wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1100,6 +1136,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
} else {
ds->pebs_event_reset[hwc->idx] = 0;
}
+
+ intel_pmu_pebs_via_pt_enable(event);
}
void intel_pmu_pebs_del(struct perf_event *event)
@@ -1111,6 +1149,8 @@ void intel_pmu_pebs_del(struct perf_event *event)
cpuc->n_pebs--;
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs--;
+ if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
+ cpuc->n_pebs_via_pt--;
pebs_update_state(needed_cb, cpuc, event, false);
}
@@ -1120,7 +1160,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- if (cpuc->n_pebs == cpuc->n_large_pebs)
+ if (cpuc->n_pebs == cpuc->n_large_pebs &&
+ cpuc->n_pebs != cpuc->n_pebs_via_pt)
intel_pmu_drain_pebs_buffer();
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -1131,6 +1172,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
+ intel_pmu_pebs_via_pt_disable(event);
+
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -2031,6 +2074,12 @@ void __init intel_ds_init(void)
PERF_SAMPLE_REGS_INTR);
}
pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+
+ if (x86_pmu.intel_cap.pebs_output_pt_available) {
+ pr_cont("PEBS-via-PT, ");
+ x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
+ }
+
break;
default:
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6f814a27416b..ea54634eabf3 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -273,7 +273,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
}
-DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
/* If quirk is enabled, ensure sign extension is 63 bits: */
inline u64 lbr_from_signext_quirk_wr(u64 val)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 26af1f19f153..74e80ed9c6c4 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -545,33 +545,62 @@ static void pt_config_buffer(void *buf, unsigned int topa_idx,
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
-/*
- * Keep ToPA table-related metadata on the same page as the actual table,
- * taking up a few words from the top
- */
-
-#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
-
/**
- * struct topa - page-sized ToPA table with metadata at the top
- * @table: actual ToPA table entries, as understood by PT hardware
+ * struct topa - ToPA metadata
* @list: linkage to struct pt_buffer's list of tables
- * @phys: physical address of this page
* @offset: offset of the first entry in this table in the buffer
* @size: total size of all entries in this table
* @last: index of the last initialized entry in this table
+ * @z_count: how many times the first entry repeats
*/
struct topa {
- struct topa_entry table[TENTS_PER_PAGE];
struct list_head list;
- u64 phys;
u64 offset;
size_t size;
int last;
+ unsigned int z_count;
};
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry))
+
+/**
+ * struct topa_page - page-sized ToPA table with metadata at the top
+ * @table: actual ToPA table entries, as understood by PT hardware
+ * @topa: metadata
+ */
+struct topa_page {
+ struct topa_entry table[TENTS_PER_PAGE];
+ struct topa topa;
+};
+
+static inline struct topa_page *topa_to_page(struct topa *topa)
+{
+ return container_of(topa, struct topa_page, topa);
+}
+
+static inline struct topa_page *topa_entry_to_page(struct topa_entry *te)
+{
+ return (struct topa_page *)((unsigned long)te & PAGE_MASK);
+}
+
+static inline phys_addr_t topa_pfn(struct topa *topa)
+{
+ return PFN_DOWN(virt_to_phys(topa_to_page(topa)));
+}
+
/* make -1 stand for the last table entry */
-#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+#define TOPA_ENTRY(t, i) \
+ ((i) == -1 \
+ ? &topa_to_page(t)->table[(t)->last] \
+ : &topa_to_page(t)->table[(i)])
+#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
+#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
/**
* topa_alloc() - allocate page-sized ToPA table
@@ -583,27 +612,26 @@ struct topa {
static struct topa *topa_alloc(int cpu, gfp_t gfp)
{
int node = cpu_to_node(cpu);
- struct topa *topa;
+ struct topa_page *tp;
struct page *p;
p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
if (!p)
return NULL;
- topa = page_address(p);
- topa->last = 0;
- topa->phys = page_to_phys(p);
+ tp = page_address(p);
+ tp->topa.last = 0;
/*
* In case of singe-entry ToPA, always put the self-referencing END
* link as the 2nd entry in the table
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
- TOPA_ENTRY(topa, 1)->end = 1;
+ TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p);
+ TOPA_ENTRY(&tp->topa, 1)->end = 1;
}
- return topa;
+ return &tp->topa;
}
/**
@@ -643,7 +671,7 @@ static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
BUG_ON(last->last != TENTS_PER_PAGE - 1);
- TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+ TOPA_ENTRY(last, -1)->base = topa_pfn(topa);
TOPA_ENTRY(last, -1)->end = 1;
}
@@ -670,7 +698,7 @@ static bool topa_table_full(struct topa *topa)
*
* Return: 0 on success or error code.
*/
-static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp)
{
struct topa *topa = buf->last;
int order = 0;
@@ -681,13 +709,18 @@ static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
order = page_private(p);
if (topa_table_full(topa)) {
- topa = topa_alloc(buf->cpu, gfp);
+ topa = topa_alloc(cpu, gfp);
if (!topa)
return -ENOMEM;
topa_insert_table(buf, topa);
}
+ if (topa->z_count == topa->last - 1) {
+ if (order == TOPA_ENTRY(topa, topa->last - 1)->size)
+ topa->z_count++;
+ }
+
TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
TOPA_ENTRY(topa, -1)->size = order;
if (!buf->snapshot &&
@@ -713,23 +746,26 @@ static void pt_topa_dump(struct pt_buffer *buf)
struct topa *topa;
list_for_each_entry(topa, &buf->tables, list) {
+ struct topa_page *tp = topa_to_page(topa);
int i;
- pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
- topa->phys, topa->offset, topa->size);
+ pr_debug("# table @%p, off %llx size %zx\n", tp->table,
+ topa->offset, topa->size);
for (i = 0; i < TENTS_PER_PAGE; i++) {
pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
- &topa->table[i],
- (unsigned long)topa->table[i].base << TOPA_SHIFT,
- sizes(topa->table[i].size),
- topa->table[i].end ? 'E' : ' ',
- topa->table[i].intr ? 'I' : ' ',
- topa->table[i].stop ? 'S' : ' ',
- *(u64 *)&topa->table[i]);
+ &tp->table[i],
+ (unsigned long)tp->table[i].base << TOPA_SHIFT,
+ sizes(tp->table[i].size),
+ tp->table[i].end ? 'E' : ' ',
+ tp->table[i].intr ? 'I' : ' ',
+ tp->table[i].stop ? 'S' : ' ',
+ *(u64 *)&tp->table[i]);
if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
- topa->table[i].stop) ||
- topa->table[i].end)
+ tp->table[i].stop) ||
+ tp->table[i].end)
break;
+ if (!i && topa->z_count)
+ i += topa->z_count;
}
}
}
@@ -771,7 +807,7 @@ static void pt_update_head(struct pt *pt)
/* offset of the current output region within this table */
for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
- base += sizes(buf->cur->table[topa_idx].size);
+ base += TOPA_ENTRY_SIZE(buf->cur, topa_idx);
if (buf->snapshot) {
local_set(&buf->data_size, base);
@@ -791,7 +827,7 @@ static void pt_update_head(struct pt *pt)
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
- return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+ return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
}
/**
@@ -800,7 +836,7 @@ static void *pt_buffer_region(struct pt_buffer *buf)
*/
static size_t pt_buffer_region_size(struct pt_buffer *buf)
{
- return sizes(buf->cur->table[buf->cur_idx].size);
+ return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx);
}
/**
@@ -830,7 +866,7 @@ static void pt_handle_status(struct pt *pt)
* know.
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
- buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+ buf->output_off == pt_buffer_region_size(buf)) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
@@ -868,9 +904,11 @@ static void pt_handle_status(struct pt *pt)
static void pt_read_offset(struct pt_buffer *buf)
{
u64 offset, base_topa;
+ struct topa_page *tp;
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
- buf->cur = phys_to_virt(base_topa);
+ tp = phys_to_virt(base_topa);
+ buf->cur = &tp->topa;
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
/* offset within current output region */
@@ -879,29 +917,97 @@ static void pt_read_offset(struct pt_buffer *buf)
buf->cur_idx = (offset & 0xffffff80) >> 7;
}
-/**
- * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
- * @buf: PT buffer.
- * @pg: Page offset in the buffer.
- *
- * When advancing to the next output region (ToPA entry), given a page offset
- * into the buffer, we need to find the offset of the first page in the next
- * region.
- */
-static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+static struct topa_entry *
+pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
{
- struct topa_entry *te = buf->topa_index[pg];
+ struct topa_page *tp;
+ struct topa *topa;
+ unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0;
- /* one region */
- if (buf->first == buf->last && buf->first->last == 1)
- return pg;
+ /*
+ * Indicates a bug in the caller.
+ */
+ if (WARN_ON_ONCE(pg >= buf->nr_pages))
+ return NULL;
+
+ /*
+ * First, find the ToPA table where @pg fits. With high
+ * order allocations, there shouldn't be many of these.
+ */
+ list_for_each_entry(topa, &buf->tables, list) {
+ if (topa->offset + topa->size > pg << PAGE_SHIFT)
+ goto found;
+ }
+
+ /*
+ * Hitting this means we have a problem in the ToPA
+ * allocation code.
+ */
+ WARN_ON_ONCE(1);
- do {
- pg++;
- pg &= buf->nr_pages - 1;
- } while (buf->topa_index[pg] == te);
+ return NULL;
- return pg;
+found:
+ /*
+ * Indicates a problem in the ToPA allocation code.
+ */
+ if (WARN_ON_ONCE(topa->last == -1))
+ return NULL;
+
+ tp = topa_to_page(topa);
+ cur_pg = PFN_DOWN(topa->offset);
+ if (topa->z_count) {
+ z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1);
+ start_idx = topa->z_count + 1;
+ }
+
+ /*
+ * Multiple entries at the beginning of the table have the same size,
+ * ideally all of them; if @pg falls there, the search is done.
+ */
+ if (pg >= cur_pg && pg < cur_pg + z_pg) {
+ idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0);
+ return &tp->table[idx];
+ }
+
+ /*
+ * Otherwise, slow path: iterate through the remaining entries.
+ */
+ for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) {
+ if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg)
+ return &tp->table[idx];
+
+ cur_pg += TOPA_ENTRY_PAGES(topa, idx);
+ }
+
+ /*
+ * Means we couldn't find a ToPA entry in the table that does match.
+ */
+ WARN_ON_ONCE(1);
+
+ return NULL;
+}
+
+static struct topa_entry *
+pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te)
+{
+ unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1);
+ struct topa_page *tp;
+ struct topa *topa;
+
+ tp = (struct topa_page *)table;
+ if (tp->table != te)
+ return --te;
+
+ topa = &tp->topa;
+ if (topa == buf->first)
+ topa = buf->last;
+ else
+ topa = list_prev_entry(topa, list);
+
+ tp = topa_to_page(topa);
+
+ return &tp->table[topa->last - 1];
}
/**
@@ -925,8 +1031,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
unsigned long idx, npages, wakeup;
/* can't stop in the middle of an output region */
- if (buf->output_off + handle->size + 1 <
- sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+ if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
return -EINVAL;
}
@@ -937,9 +1042,13 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
return 0;
/* clear STOP and INT from current entry */
- buf->topa_index[buf->stop_pos]->stop = 0;
- buf->topa_index[buf->stop_pos]->intr = 0;
- buf->topa_index[buf->intr_pos]->intr = 0;
+ if (buf->stop_te) {
+ buf->stop_te->stop = 0;
+ buf->stop_te->intr = 0;
+ }
+
+ if (buf->intr_te)
+ buf->intr_te->intr = 0;
/* how many pages till the STOP marker */
npages = handle->size >> PAGE_SHIFT;
@@ -950,7 +1059,12 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
idx = (head >> PAGE_SHIFT) + npages;
idx &= buf->nr_pages - 1;
- buf->stop_pos = idx;
+
+ if (idx != buf->stop_pos) {
+ buf->stop_pos = idx;
+ buf->stop_te = pt_topa_entry_for_page(buf, idx);
+ buf->stop_te = pt_topa_prev_entry(buf, buf->stop_te);
+ }
wakeup = handle->wakeup >> PAGE_SHIFT;
@@ -960,51 +1074,20 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
idx = wakeup;
idx &= buf->nr_pages - 1;
- buf->intr_pos = idx;
+ if (idx != buf->intr_pos) {
+ buf->intr_pos = idx;
+ buf->intr_te = pt_topa_entry_for_page(buf, idx);
+ buf->intr_te = pt_topa_prev_entry(buf, buf->intr_te);
+ }
- buf->topa_index[buf->stop_pos]->stop = 1;
- buf->topa_index[buf->stop_pos]->intr = 1;
- buf->topa_index[buf->intr_pos]->intr = 1;
+ buf->stop_te->stop = 1;
+ buf->stop_te->intr = 1;
+ buf->intr_te->intr = 1;
return 0;
}
/**
- * pt_buffer_setup_topa_index() - build topa_index[] table of regions
- * @buf: PT buffer.
- *
- * topa_index[] references output regions indexed by offset into the
- * buffer for purposes of quick reverse lookup.
- */
-static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
-{
- struct topa *cur = buf->first, *prev = buf->last;
- struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
- *te_prev = TOPA_ENTRY(prev, prev->last - 1);
- int pg = 0, idx = 0;
-
- while (pg < buf->nr_pages) {
- int tidx;
-
- /* pages within one topa entry */
- for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
- buf->topa_index[pg] = te_prev;
-
- te_prev = te_cur;
-
- if (idx == cur->last - 1) {
- /* advance to next topa table */
- idx = 0;
- cur = list_entry(cur->list.next, struct topa, list);
- } else {
- idx++;
- }
- te_cur = TOPA_ENTRY(cur, idx);
- }
-
-}
-
-/**
* pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
* @buf: PT buffer.
* @head: Write pointer (aux_head) from AUX buffer.
@@ -1021,18 +1104,20 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
*/
static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
{
+ struct topa_page *cur_tp;
+ struct topa_entry *te;
int pg;
if (buf->snapshot)
head &= (buf->nr_pages << PAGE_SHIFT) - 1;
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
- pg = pt_topa_next_entry(buf, pg);
+ te = pt_topa_entry_for_page(buf, pg);
- buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
- buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
- (unsigned long)buf->cur) / sizeof(struct topa_entry);
- buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+ cur_tp = topa_entry_to_page(te);
+ buf->cur = &cur_tp->topa;
+ buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
+ buf->output_off = head & (pt_buffer_region_size(buf) - 1);
local64_set(&buf->head, head);
local_set(&buf->data_size, 0);
@@ -1061,31 +1146,29 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
* @size: Total size of all regions within this ToPA.
* @gfp: Allocation flags.
*/
-static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
- gfp_t gfp)
+static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
+ unsigned long nr_pages, gfp_t gfp)
{
struct topa *topa;
int err;
- topa = topa_alloc(buf->cpu, gfp);
+ topa = topa_alloc(cpu, gfp);
if (!topa)
return -ENOMEM;
topa_insert_table(buf, topa);
while (buf->nr_pages < nr_pages) {
- err = topa_insert_pages(buf, gfp);
+ err = topa_insert_pages(buf, cpu, gfp);
if (err) {
pt_buffer_fini_topa(buf);
return -ENOMEM;
}
}
- pt_buffer_setup_topa_index(buf);
-
/* link last table to the first one, unless we're double buffering */
if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+ TOPA_ENTRY(buf->last, -1)->base = topa_pfn(buf->first);
TOPA_ENTRY(buf->last, -1)->end = 1;
}
@@ -1119,18 +1202,18 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
cpu = raw_smp_processor_id();
node = cpu_to_node(cpu);
- buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
- GFP_KERNEL, node);
+ buf = kzalloc_node(sizeof(struct pt_buffer), GFP_KERNEL, node);
if (!buf)
return NULL;
- buf->cpu = cpu;
buf->snapshot = snapshot;
buf->data_pages = pages;
+ buf->stop_pos = -1;
+ buf->intr_pos = -1;
INIT_LIST_HEAD(&buf->tables);
- ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+ ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
if (ret) {
kfree(buf);
return NULL;
@@ -1296,7 +1379,7 @@ void intel_pt_interrupt(void)
return;
}
- pt_config_buffer(buf->cur->table, buf->cur_idx,
+ pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config(event);
}
@@ -1361,7 +1444,7 @@ static void pt_event_start(struct perf_event *event, int mode)
WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
- pt_config_buffer(buf->cur->table, buf->cur_idx,
+ pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config(event);
@@ -1481,6 +1564,11 @@ void cpu_emergency_stop_pt(void)
pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
}
+int is_intel_pt_event(struct perf_event *event)
+{
+ return event->pmu == &pt_pmu.pmu;
+}
+
static __init int pt_init(void)
{
int ret, cpu, prior_warn = 0;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 63fe4063fbd6..1d2bb7572374 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -53,7 +53,6 @@ struct pt_pmu {
/**
* struct pt_buffer - buffer configuration; one buffer per task_struct or
* cpu, depending on perf event configuration
- * @cpu: cpu for per-cpu allocation
* @tables: list of ToPA tables in this buffer
* @first: shorthand for first topa table
* @last: shorthand for last topa table
@@ -65,13 +64,14 @@ struct pt_pmu {
* @lost: if data was lost/truncated
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
- * @stop_pos: STOP topa entry in the buffer
- * @intr_pos: INT topa entry in the buffer
+ * @stop_pos: STOP topa entry index
+ * @intr_pos: INT topa entry index
+ * @stop_te: STOP topa entry pointer
+ * @intr_te: INT topa entry pointer
* @data_pages: array of pages from perf
* @topa_index: table of topa entries indexed by page offset
*/
struct pt_buffer {
- int cpu;
struct list_head tables;
struct topa *first, *last, *cur;
unsigned int cur_idx;
@@ -80,9 +80,9 @@ struct pt_buffer {
local_t data_size;
local64_t head;
bool snapshot;
- unsigned long stop_pos, intr_pos;
+ long stop_pos, intr_pos;
+ struct topa_entry *stop_te, *intr_te;
void **data_pages;
- struct topa_entry *topa_index[0];
};
#define PT_FILTERS_NUM 4
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 22f5843ef406..5053a403e4ae 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -634,7 +634,7 @@ static void cleanup_rapl_pmus(void)
kfree(rapl_pmus);
}
-const struct attribute_group *rapl_attr_update[] = {
+static const struct attribute_group *rapl_attr_update[] = {
&rapl_events_cores_group,
&rapl_events_pkg_group,
&rapl_events_ram_group,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index ab79d3100241..b1afc77f0704 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -167,7 +167,7 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-const struct attribute_group *attr_update[] = {
+static const struct attribute_group *attr_update[] = {
&group_aperf,
&group_mperf,
&group_pperf,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8751008fc170..ecacfbf4ebc1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -76,6 +76,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
+#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -85,6 +86,11 @@ struct amd_nb {
};
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
+#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60)
+#define PEBS_OUTPUT_OFFSET 61
+#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD)
/*
* Flags PEBS can handle without an PMI.
@@ -211,6 +217,8 @@ struct cpu_hw_events {
u64 pebs_enabled;
int n_pebs;
int n_large_pebs;
+ int n_pebs_via_pt;
+ int pebs_output;
/* Current super set of events hardware configuration */
u64 pebs_data_cfg;
@@ -510,6 +518,8 @@ union perf_capabilities {
*/
u64 full_width_write:1;
u64 pebs_baseline:1;
+ u64 pebs_metrics_available:1;
+ u64 pebs_output_pt_available:1;
};
u64 capabilities;
};
@@ -692,6 +702,8 @@ struct x86_pmu {
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
*/
int (*check_period) (struct perf_event *event, u64 period);
+
+ int (*aux_output_match) (struct perf_event *event);
};
struct x86_perf_task_context {
@@ -901,6 +913,11 @@ static inline int amd_pmu_init(void)
#endif /* CONFIG_CPU_SUP_AMD */
+static inline int is_pebs_pt(struct perf_event *event)
+{
+ return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT);
+}
+
#ifdef CONFIG_CPU_SUP_INTEL
static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period)
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index e65d7fe6489f..5208ba49c89a 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
* Lower 12 bits encode the number of additional
* pages to flush (in addition to the 'cur' page).
*/
- if (diff >= HV_TLB_FLUSH_UNIT)
+ if (diff >= HV_TLB_FLUSH_UNIT) {
gva_list[gva_n] |= ~PAGE_MASK;
- else if (diff)
+ cur += HV_TLB_FLUSH_UNIT;
+ } else if (diff) {
gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+ cur = end;
+ }
- cur += HV_TLB_FLUSH_UNIT;
gva_n++;
} while (cur < end);
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 9e5f3c722c33..981fe923a59f 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -70,6 +70,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
BOOT_PARAM_PRESERVE(eddbuf_entries),
BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+ BOOT_PARAM_PRESERVE(secure_boot),
BOOT_PARAM_PRESERVE(hdr),
BOOT_PARAM_PRESERVE(e820_table),
BOOT_PARAM_PRESERVE(eddbuf),
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 20a46150e0a8..9b8cb50768c2 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -73,6 +73,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
#else
# include <asm-generic/div64.h>
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+ u64 q;
+
+ asm ("mulq %2; divq %3" : "=a" (q)
+ : "a" (a), "rm" ((u64)mul), "rm" ((u64)div)
+ : "rdx");
+
+ return q;
+}
+#define mul_u64_u32_div mul_u64_u32_div
+
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_DIV64_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 606a4b6a9812..43a82e59c59d 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -242,6 +242,7 @@ static inline bool efi_is_64bit(void)
__efi_early()->runtime_services), __VA_ARGS__)
extern bool efi_reboot_required(void);
+extern bool efi_is_table_address(unsigned long phys_addr);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
@@ -249,6 +250,10 @@ static inline bool efi_reboot_required(void)
{
return false;
}
+static inline bool efi_is_table_address(unsigned long phys_addr)
+{
+ return false;
+}
#endif /* CONFIG_EFI */
#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
deleted file mode 100644
index 47b7a1296245..000000000000
--- a/arch/x86/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-asmlinkage void just_return_func(void);
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 634f99b1dc22..423b788f495e 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -28,10 +28,12 @@ enum pt_capabilities {
void cpu_emergency_stop_pt(void);
extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap);
+extern int is_intel_pt_event(struct perf_event *event);
#else
static inline void cpu_emergency_stop_pt(void) {}
static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; }
static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; }
+static inline int is_intel_pt_event(struct perf_event *event) { return 0; }
#endif
#endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index baedab8ac538..b91623d521d9 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -4,7 +4,6 @@
extern int force_iommu, no_iommu;
extern int iommu_detected;
-extern int iommu_pass_through;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74e88e5edd9c..bdc16b0aa7c6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -335,6 +335,7 @@ struct kvm_mmu_page {
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
+ unsigned long mmu_valid_gen;
DECLARE_BITMAP(unsync_child_bitmap, 512);
#ifdef CONFIG_X86_32
@@ -856,6 +857,7 @@ struct kvm_arch {
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
+ unsigned long mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 695881ef682c..20ce682a2540 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -375,6 +375,10 @@
/* Alternative perfctr range with full access. */
#define MSR_IA32_PMC0 0x000004c1
+/* Auto-reload via MSR instead of DS area */
+#define MSR_RELOAD_PMC0 0x000014c1
+#define MSR_RELOAD_FIXED_CTR0 0x00001309
+
/*
* AMD64 MSRs. Not complete. See the architecture manual for a more
* complete list.
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..3d4cb83a8828 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -102,7 +102,7 @@ static __always_inline bool should_resched(int preempt_offset)
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
extern asmlinkage void ___preempt_schedule(void);
# define __preempt_schedule() \
asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index bd5ac6cc37db..444d6fd9a6d8 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -63,10 +63,25 @@ static inline bool vcpu_is_preempted(long cpu)
#endif
#ifdef CONFIG_PARAVIRT
+/*
+ * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack.
+ *
+ * Native (and PV wanting native due to vCPU pinning) should disable this key.
+ * It is done in this backwards fashion to only have a single direction change,
+ * which removes ordering between native_pv_spin_init() and HV setup.
+ */
DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
void native_pv_lock_init(void) __init;
+/*
+ * Shortcut for the queued_spin_lock_slowpath() function that allows
+ * virt to hijack it.
+ *
+ * Returns:
+ * true - lock has been negotiated, all done;
+ * false - queued_spin_lock_slowpath() will do its thing.
+ */
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 9c4435307ff8..35c225ede0e4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -444,8 +444,10 @@ __pu_label: \
({ \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
+ __typeof__(ptr) __gu_ptr = (ptr); \
+ __typeof__(size) __gu_size = (size); \
__uaccess_begin_nospec(); \
- __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
+ __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__builtin_expect(__gu_err, 0); \
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e60c45fd3679..6bc6d89d8e2a 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,10 +12,12 @@ struct mm_struct;
#ifdef CONFIG_X86_UV
#include <linux/efi.h>
+extern unsigned long uv_systab_phys;
+
extern enum uv_system_type get_uv_system_type(void);
static inline bool is_early_uv_system(void)
{
- return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+ return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
}
extern int is_uv_system(void);
extern int is_uv_hubless(void);
diff --git a/arch/x86/include/uapi/asm/errno.h b/arch/x86/include/uapi/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/x86/include/uapi/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/x86/include/uapi/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/x86/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/x86/include/uapi/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/x86/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/x86/include/uapi/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/x86/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/x86/include/uapi/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/x86/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/x86/include/uapi/asm/param.h b/arch/x86/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/x86/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/x86/include/uapi/asm/resource.h b/arch/x86/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/x86/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/x86/include/uapi/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/x86/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/x86/include/uapi/asm/termios.h b/arch/x86/include/uapi/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/x86/include/uapi/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/x86/include/uapi/asm/types.h b/arch/x86/include/uapi/asm/types.h
deleted file mode 100644
index 9d5c11a24279..000000000000
--- a/arch/x86/include/uapi/asm/types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
-
-#include <asm-generic/types.h>
-
-#endif /* _ASM_X86_TYPES_H */
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index b0715c3ac18d..7f9ade13bbcf 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -18,8 +18,13 @@ ENTRY(wakeup_long64)
movq saved_magic, %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
- jne bogus_64_magic
+ je 2f
+ /* stop here on a saved_magic mismatch */
+ movq $0xbad6d61676963, %rcx
+1:
+ jmp 1b
+2:
movw $__KERNEL_DS, %ax
movw %ax, %ss
movw %ax, %ds
@@ -37,9 +42,6 @@ ENTRY(wakeup_long64)
jmp *%rax
ENDPROC(wakeup_long64)
-bogus_64_magic:
- jmp bogus_64_magic
-
ENTRY(do_suspend_lowlevel)
FRAME_BEGIN
subq $8, %rsp
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index d63e63b7d1d9..251c795b4eb3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -21,6 +21,7 @@
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
@@ -50,6 +51,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{}
};
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -63,6 +65,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 78f7e763f2cd..909abb2d59eb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
if (!boot_cpu_has(X86_FEATURE_APIC))
return true;
+ /* Virt guests may lack ARAT, but still have DEADLINE */
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ return true;
+
/* Deadline timer is based on TSC so no further PIT action required */
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return false;
@@ -1179,10 +1183,6 @@ void clear_local_APIC(void)
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
v = apic_read(APIC_LVT1);
apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
- if (!x2apic_enabled()) {
- v = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- apic_write(APIC_LDR, v);
- }
if (maxlvt >= 4) {
v = apic_read(APIC_LVTPC);
apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 29f0cdfbdca5..90f75e515876 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/random.h>
+#include <linux/topology.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
@@ -889,6 +890,10 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
+#ifdef CONFIG_NUMA
+ node_reclaim_distance = 32;
+#endif
+
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID.
* Always set it, except when running under a hypervisor.
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 210f1f5db5f7..87bcdc6dc2f0 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -107,11 +107,11 @@ static struct severity {
*/
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
+ SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
+ SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
),
/* ignore OVER for UCNA */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2b5886401e5f..e07424e19274 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -367,13 +367,18 @@ NOKPROBE_SYMBOL(oops_end);
int __die(const char *str, struct pt_regs *regs, long err)
{
+ const char *pr = "";
+
/* Save the regs of the first oops for the executive summary later. */
if (!die_counter)
exec_summary_regs = *regs;
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk(KERN_DEFAULT
"%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ pr,
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0e0b08008b5a..43fc13c831af 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -580,7 +580,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
if (setup_detour_execution(p, regs, reenter))
return;
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
if (p->ainsn.boostable && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
if (!reenter)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4ab377c9fffe..4cc967178bf9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -311,7 +311,7 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif
pa |= KVM_ASYNC_PF_ENABLED;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f62b498b18fb..fa4352dce491 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/dma-direct.h>
#include <linux/dma-debug.h>
+#include <linux/iommu.h>
#include <linux/dmar.h>
#include <linux/export.h>
#include <linux/memblock.h>
@@ -34,21 +35,6 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
-/*
- * This variable becomes 1 if iommu=pt is passed on the kernel command line.
- * If this variable is 1, IOMMU implementations do no DMA translation for
- * devices and allow every device to access to whole physical memory. This is
- * useful if a user wants to use an IOMMU only for KVM device assignment to
- * guests and not for driver dma translation.
- * It is also possible to disable by default in kernel config, and enable with
- * iommu=nopt at boot time.
- */
-#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
-int iommu_pass_through __read_mostly = 1;
-#else
-int iommu_pass_through __read_mostly;
-#endif
-
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
void __init pci_iommu_alloc(void)
@@ -120,9 +106,9 @@ static __init int iommu_setup(char *p)
swiotlb = 1;
#endif
if (!strncmp(p, "pt", 2))
- iommu_pass_through = 1;
+ iommu_set_default_passthrough(true);
if (!strncmp(p, "nopt", 4))
- iommu_pass_through = 0;
+ iommu_set_default_translated(true);
gart_parse_options(p);
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 5b345add550f..548fefed71ee 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -19,7 +19,7 @@
/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
*
* The feature User-Mode Instruction Prevention present in recent Intel
- * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
+ * processor prevents a group of instructions (SGDT, SIDT, SLDT, SMSW and STR)
* from being executed with CPL > 0. Otherwise, a general protection fault is
* issued.
*
@@ -36,8 +36,8 @@
* DOSEMU2) rely on this subset of instructions to function.
*
* The instructions protected by UMIP can be split in two groups. Those which
- * return a kernel memory address (sgdt and sidt) and those which return a
- * value (sldt, str and smsw).
+ * return a kernel memory address (SGDT and SIDT) and those which return a
+ * value (SLDT, STR and SMSW).
*
* For the instructions that return a kernel memory address, applications
* such as WineHQ rely on the result being located in the kernel memory space,
@@ -45,15 +45,13 @@
* value that, lies close to the top of the kernel memory. The limit for the GDT
* and the IDT are set to zero.
*
- * Given that sldt and str are not commonly used in programs that run on WineHQ
+ * Given that SLDT and STR are not commonly used in programs that run on WineHQ
* or DOSEMU2, they are not emulated.
*
* The instruction smsw is emulated to return the value that the register CR0
* has at boot time as set in the head_32.
*
- * Also, emulation is provided only for 32-bit processes; 64-bit processes
- * that attempt to use the instructions that UMIP protects will receive the
- * SIGSEGV signal issued as a consequence of the general protection fault.
+ * Emulation is provided for both 32-bit and 64-bit processes.
*
* Care is taken to appropriately emulate the results when segmentation is
* used. That is, rather than relying on USER_DS and USER_CS, the function
@@ -63,17 +61,18 @@
* application uses a local descriptor table.
*/
-#define UMIP_DUMMY_GDT_BASE 0xfffe0000
-#define UMIP_DUMMY_IDT_BASE 0xffff0000
+#define UMIP_DUMMY_GDT_BASE 0xfffffffffffe0000ULL
+#define UMIP_DUMMY_IDT_BASE 0xffffffffffff0000ULL
/*
* The SGDT and SIDT instructions store the contents of the global descriptor
* table and interrupt table registers, respectively. The destination is a
* memory operand of X+2 bytes. X bytes are used to store the base address of
- * the table and 2 bytes are used to store the limit. In 32-bit processes, the
- * only processes for which emulation is provided, X has a value of 4.
+ * the table and 2 bytes are used to store the limit. In 32-bit processes X
+ * has a value of 4, in 64-bit processes X has a value of 8.
*/
-#define UMIP_GDT_IDT_BASE_SIZE 4
+#define UMIP_GDT_IDT_BASE_SIZE_64BIT 8
+#define UMIP_GDT_IDT_BASE_SIZE_32BIT 4
#define UMIP_GDT_IDT_LIMIT_SIZE 2
#define UMIP_INST_SGDT 0 /* 0F 01 /0 */
@@ -189,6 +188,7 @@ static int identify_insn(struct insn *insn)
* @umip_inst: A constant indicating the instruction to emulate
* @data: Buffer into which the dummy result is stored
* @data_size: Size of the emulated result
+ * @x86_64: true if process is 64-bit, false otherwise
*
* Emulate an instruction protected by UMIP and provide a dummy result. The
* result of the emulation is saved in @data. The size of the results depends
@@ -202,11 +202,8 @@ static int identify_insn(struct insn *insn)
* 0 on success, -EINVAL on error while emulating.
*/
static int emulate_umip_insn(struct insn *insn, int umip_inst,
- unsigned char *data, int *data_size)
+ unsigned char *data, int *data_size, bool x86_64)
{
- unsigned long dummy_base_addr, dummy_value;
- unsigned short dummy_limit = 0;
-
if (!data || !data_size || !insn)
return -EINVAL;
/*
@@ -219,6 +216,9 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
* is always returned irrespective of the operand size.
*/
if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
+ u64 dummy_base_addr;
+ u16 dummy_limit = 0;
+
/* SGDT and SIDT do not use registers operands. */
if (X86_MODRM_MOD(insn->modrm.value) == 3)
return -EINVAL;
@@ -228,13 +228,24 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
else
dummy_base_addr = UMIP_DUMMY_IDT_BASE;
- *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
+ /*
+ * 64-bit processes use the entire dummy base address.
+ * 32-bit processes use the lower 32 bits of the base address.
+ * dummy_base_addr is always 64 bits, but we memcpy the correct
+ * number of bytes from it to the destination.
+ */
+ if (x86_64)
+ *data_size = UMIP_GDT_IDT_BASE_SIZE_64BIT;
+ else
+ *data_size = UMIP_GDT_IDT_BASE_SIZE_32BIT;
+
+ memcpy(data + 2, &dummy_base_addr, *data_size);
- memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
+ *data_size += UMIP_GDT_IDT_LIMIT_SIZE;
memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
} else if (umip_inst == UMIP_INST_SMSW) {
- dummy_value = CR0_STATE;
+ unsigned long dummy_value = CR0_STATE;
/*
* Even though the CR0 register has 4 bytes, the number
@@ -290,11 +301,10 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
* fixup_umip_exception() - Fixup a general protection fault caused by UMIP
* @regs: Registers as saved when entering the #GP handler
*
- * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
- * fault if executed with CPL > 0 (i.e., from user space). If the offending
- * user-space process is not in long mode, this function fixes the exception
- * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
- * fixed up. Also long mode user-space processes are not fixed up.
+ * The instructions SGDT, SIDT, STR, SMSW and SLDT cause a general protection
+ * fault if executed with CPL > 0 (i.e., from user space). This function fixes
+ * the exception up and provides dummy results for SGDT, SIDT and SMSW; STR
+ * and SLDT are not fixed up.
*
* If operands are memory addresses, results are copied to user-space memory as
* indicated by the instruction pointed by eIP using the registers indicated in
@@ -373,13 +383,14 @@ bool fixup_umip_exception(struct pt_regs *regs)
umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
- /* Do not emulate SLDT, STR or user long mode processes. */
- if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+ /* Do not emulate (spoof) SLDT or STR. */
+ if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
return false;
umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
- if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
+ if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
+ user_64bit_mode(regs)))
return false;
/*
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 218b277bfda3..a63964e7cec7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2095,6 +2095,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
if (!direct)
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+ /*
+ * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
+ * depends on valid pages being added to the head of the list. See
+ * comments in kvm_zap_obsolete_pages().
+ */
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
return sp;
@@ -2244,7 +2250,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
#define for_each_valid_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
- if ((_sp)->role.invalid) { \
+ if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \
} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
@@ -2301,6 +2307,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
@@ -2525,6 +2536,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
}
+ sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
@@ -4233,6 +4245,13 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
return false;
if (cached_root_available(vcpu, new_cr3, new_role)) {
+ /*
+ * It is possible that the cached previous root page is
+ * obsolete because of a change in the MMU generation
+ * number. However, changing the generation number is
+ * accompanied by KVM_REQ_MMU_RELOAD, which will free
+ * the root set here and allocate a new one.
+ */
kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
if (!skip_tlb_flush) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -5649,11 +5668,89 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
return alloc_mmu_pages(vcpu);
}
+
+static void kvm_zap_obsolete_pages(struct kvm *kvm)
+{
+ struct kvm_mmu_page *sp, *node;
+ LIST_HEAD(invalid_list);
+ int ign;
+
+restart:
+ list_for_each_entry_safe_reverse(sp, node,
+ &kvm->arch.active_mmu_pages, link) {
+ /*
+ * No obsolete valid page exists before a newly created page
+ * since active_mmu_pages is a FIFO list.
+ */
+ if (!is_obsolete_sp(kvm, sp))
+ break;
+
+ /*
+ * Do not repeatedly zap a root page to avoid unnecessary
+ * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
+ * progress:
+ * vcpu 0 vcpu 1
+ * call vcpu_enter_guest():
+ * 1): handle KVM_REQ_MMU_RELOAD
+ * and require mmu-lock to
+ * load mmu
+ * repeat:
+ * 1): zap root page and
+ * send KVM_REQ_MMU_RELOAD
+ *
+ * 2): if (cond_resched_lock(mmu-lock))
+ *
+ * 2): hold mmu-lock and load mmu
+ *
+ * 3): see KVM_REQ_MMU_RELOAD bit
+ * on vcpu->requests is set
+ * then return 1 to call
+ * vcpu_enter_guest() again.
+ * goto repeat;
+ *
+ * Since we are reversely walking the list and the invalid
+ * list will be moved to the head, skip the invalid page
+ * can help us to avoid the infinity list walking.
+ */
+ if (sp->role.invalid)
+ continue;
+
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ cond_resched_lock(&kvm->mmu_lock);
+ goto restart;
+ }
+
+ if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
+ goto restart;
+ }
+
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+}
+
+/*
+ * Fast invalidate all shadow pages and use lock-break technique
+ * to zap obsolete pages.
+ *
+ * It's required when memslot is being deleted or VM is being
+ * destroyed, in these cases, we should ensure that KVM MMU does
+ * not use any resource of the being-deleted slot or all slots
+ * after calling the function.
+ */
+static void kvm_mmu_zap_all_fast(struct kvm *kvm)
+{
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.mmu_valid_gen++;
+
+ kvm_zap_obsolete_pages(kvm);
+ spin_unlock(&kvm->mmu_lock);
+}
+
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot,
struct kvm_page_track_notifier_node *node)
{
- kvm_mmu_zap_all(kvm);
+ kvm_mmu_zap_all_fast(kvm);
}
void kvm_mmu_init_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ced9fba32598..a3cba321b5c5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4540,6 +4540,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
int len;
gva_t gva = 0;
struct vmcs12 *vmcs12;
+ struct x86_exception e;
short offset;
if (!nested_vmx_check_permission(vcpu))
@@ -4588,7 +4589,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
vmx_instruction_info, true, len, &gva))
return 1;
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
- kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
+ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
+ kvm_inject_page_fault(vcpu, &e);
}
return nested_vmx_succeed(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 290c3c3efb87..91602d310a3f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5312,6 +5312,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
/* kvm_write_guest_virt_system can pull in tons of pages. */
vcpu->arch.l1tf_flush_l1d = true;
+ /*
+ * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
+ * is returned, but our callers are not ready for that and they blindly
+ * call kvm_inject_page_fault. Ensure that they at least do not leak
+ * uninitialized kernel stack memory into cr2 and error code.
+ */
+ memset(exception, 0, sizeof(*exception));
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
PFERR_WRITE_MASK, exception);
}
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 4fe1601dbc5d..86976b55ae74 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -33,7 +33,7 @@
102:
.section .fixup,"ax"
103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(100b, 103b)
@@ -113,7 +113,7 @@ ENTRY(copy_user_generic_unrolled)
40: leal (%rdx,%rcx,8),%edx
jmp 60f
50: movl %ecx,%edx
-60: jmp copy_user_handle_tail /* ecx is zerorest also */
+60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
_ASM_EXTABLE_UA(1b, 30b)
@@ -177,7 +177,7 @@ ENTRY(copy_user_generic_string)
.section .fixup,"ax"
11: leal (%rdx,%rcx,8),%ecx
12: movl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 11b)
@@ -210,7 +210,7 @@ ENTRY(copy_user_enhanced_fast_string)
.section .fixup,"ax"
12: movl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 12b)
@@ -231,7 +231,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* eax uncopied bytes or 0 if successful.
*/
ALIGN;
-copy_user_handle_tail:
+.Lcopy_user_handle_tail:
movl %edx,%ecx
1: rep movsb
2: mov %ecx,%eax
@@ -239,7 +239,7 @@ copy_user_handle_tail:
ret
_ASM_EXTABLE_UA(1b, 2b)
-END(copy_user_handle_tail)
+END(.Lcopy_user_handle_tail)
/*
* copy_user_nocache - Uncached memory copy with exception handling
@@ -364,7 +364,7 @@ ENTRY(__copy_user_nocache)
movl %ecx,%edx
.L_fixup_handle_tail:
sfence
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 304f958c27b2..9578eb88fc87 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -115,7 +115,7 @@ ENDPROC(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
-bad_get_user_clac:
+.Lbad_get_user_clac:
ASM_CLAC
bad_get_user:
xor %edx,%edx
@@ -123,7 +123,7 @@ bad_get_user:
ret
#ifdef CONFIG_X86_32
-bad_get_user_8_clac:
+.Lbad_get_user_8_clac:
ASM_CLAC
bad_get_user_8:
xor %edx,%edx
@@ -132,12 +132,12 @@ bad_get_user_8:
ret
#endif
- _ASM_EXTABLE_UA(1b, bad_get_user_clac)
- _ASM_EXTABLE_UA(2b, bad_get_user_clac)
- _ASM_EXTABLE_UA(3b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(4b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(4b, .Lbad_get_user_clac)
#else
- _ASM_EXTABLE_UA(4b, bad_get_user_8_clac)
- _ASM_EXTABLE_UA(5b, bad_get_user_8_clac)
+ _ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 14bf78341d3c..126dd6a9ec9b 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -37,7 +37,7 @@
ENTRY(__put_user_1)
ENTER
cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %eax,%eax
@@ -51,7 +51,7 @@ ENTRY(__put_user_2)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $1,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
2: movw %ax,(%_ASM_CX)
xor %eax,%eax
@@ -65,7 +65,7 @@ ENTRY(__put_user_4)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $3,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
3: movl %eax,(%_ASM_CX)
xor %eax,%eax
@@ -79,7 +79,7 @@ ENTRY(__put_user_8)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $7,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
4: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
@@ -91,16 +91,16 @@ ENTRY(__put_user_8)
ENDPROC(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
-bad_put_user_clac:
+.Lbad_put_user_clac:
ASM_CLAC
-bad_put_user:
+.Lbad_put_user:
movl $-EFAULT,%eax
RET
- _ASM_EXTABLE_UA(1b, bad_put_user_clac)
- _ASM_EXTABLE_UA(2b, bad_put_user_clac)
- _ASM_EXTABLE_UA(3b, bad_put_user_clac)
- _ASM_EXTABLE_UA(4b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE_UA(5b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
#endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 63e99f15d7cf..a39dcdb5ae34 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -19,6 +19,7 @@
#include <asm/set_memory.h>
#include <asm/e820/api.h>
+#include <asm/efi.h>
#include <asm/fixmap.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 7389db538c30..6fa42e9c4e6f 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -29,6 +29,7 @@
static bool pci_mmcfg_running_state;
static bool pci_mmcfg_arch_init_failed;
static DEFINE_MUTEX(pci_mmcfg_lock);
+#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map)
LIST_HEAD(pci_mmcfg_list);
@@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new)
struct pci_mmcfg_region *cfg;
/* keep list sorted by segment and starting bus number */
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) {
if (cfg->segment > new->segment ||
(cfg->segment == new->segment &&
cfg->start_bus >= new->start_bus)) {
@@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
{
struct pci_mmcfg_region *cfg;
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held())
if (cfg->segment == segment &&
cfg->start_bus <= bus && bus <= cfg->end_bus)
return cfg;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a7189a3b4d70..c202e1b07e29 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -59,11 +59,34 @@ static efi_system_table_t efi_systab __initdata;
static efi_config_table_type_t arch_tables[] __initdata = {
#ifdef CONFIG_X86_UV
- {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+ {UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
#endif
{NULL_GUID, NULL, NULL},
};
+static const unsigned long * const efi_tables[] = {
+ &efi.mps,
+ &efi.acpi,
+ &efi.acpi20,
+ &efi.smbios,
+ &efi.smbios3,
+ &efi.boot_info,
+ &efi.hcdp,
+ &efi.uga,
+#ifdef CONFIG_X86_UV
+ &uv_systab_phys,
+#endif
+ &efi.fw_vendor,
+ &efi.runtime,
+ &efi.config_table,
+ &efi.esrt,
+ &efi.properties_table,
+ &efi.mem_attr_table,
+#ifdef CONFIG_EFI_RCI2_TABLE
+ &rci2_table_phys,
+#endif
+};
+
u64 efi_setup; /* efi setup_data physical address */
static int add_efi_memmap __initdata;
@@ -1049,3 +1072,17 @@ static int __init arch_parse_efi_cmdline(char *str)
return 0;
}
early_param("efi", arch_parse_efi_cmdline);
+
+bool efi_is_table_address(unsigned long phys_addr)
+{
+ unsigned int i;
+
+ if (phys_addr == EFI_INVALID_TABLE_ADDR)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+ if (*(efi_tables[i]) == phys_addr)
+ return true;
+
+ return false;
+}
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 7c69652ffeea..c2ee31953372 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -14,6 +14,8 @@
#include <asm/uv/bios.h>
#include <asm/uv/uv_hub.h>
+unsigned long uv_systab_phys __ro_after_init = EFI_INVALID_TABLE_ADDR;
+
struct uv_systab *uv_systab;
static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -185,13 +187,13 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
void uv_bios_init(void)
{
uv_systab = NULL;
- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
- !efi.uv_systab || efi_runtime_disabled()) {
+ if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
+ !uv_systab_phys || efi_runtime_disabled()) {
pr_crit("UV: UVsystab: missing\n");
return;
}
- uv_systab = ioremap(efi.uv_systab, sizeof(struct uv_systab));
+ uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
pr_err("UV: UVsystab: bad signature!\n");
iounmap(uv_systab);
@@ -203,7 +205,7 @@ void uv_bios_init(void)
int size = uv_systab->size;
iounmap(uv_systab);
- uv_systab = ioremap(efi.uv_systab, size);
+ uv_systab = ioremap(uv_systab_phys, size);
if (!uv_systab) {
pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
return;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 8901a1f89cf5..10fb42da0007 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -18,37 +18,40 @@ targets += purgatory.ro
KASAN_SANITIZE := n
KCOV_INSTRUMENT := n
+# These are adjustments to the compiler flags used for objects that
+# make up the standalone purgatory.ro
+
+PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
# sure how to relocate those.
ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_sha256.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_purgatory.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_string.o += $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_kexec-purgatory.o += $(CC_FLAGS_FTRACE)
+PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE)
endif
ifdef CONFIG_STACKPROTECTOR
-CFLAGS_REMOVE_sha256.o += -fstack-protector
-CFLAGS_REMOVE_purgatory.o += -fstack-protector
-CFLAGS_REMOVE_string.o += -fstack-protector
-CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector
+PURGATORY_CFLAGS_REMOVE += -fstack-protector
endif
ifdef CONFIG_STACKPROTECTOR_STRONG
-CFLAGS_REMOVE_sha256.o += -fstack-protector-strong
-CFLAGS_REMOVE_purgatory.o += -fstack-protector-strong
-CFLAGS_REMOVE_string.o += -fstack-protector-strong
-CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector-strong
+PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong
endif
ifdef CONFIG_RETPOLINE
-CFLAGS_REMOVE_sha256.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_purgatory.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_string.o += $(RETPOLINE_CFLAGS)
-CFLAGS_REMOVE_kexec-purgatory.o += $(RETPOLINE_CFLAGS)
+PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS)
endif
+CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_purgatory.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_string.o += $(PURGATORY_CFLAGS)
+
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)