summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/boot/compressed/eboot.c308
-rw-r--r--arch/x86/boot/compressed/eboot.h74
-rw-r--r--arch/x86/configs/kvm_guest.config3
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl4
-rw-r--r--arch/x86/events/Kconfig36
-rw-r--r--arch/x86/events/Makefile9
-rw-r--r--arch/x86/events/amd/uncore.c2
-rw-r--r--arch/x86/events/core.c10
-rw-r--r--arch/x86/events/intel/Makefile9
-rw-r--r--arch/x86/events/intel/bts.c105
-rw-r--r--arch/x86/events/intel/core.c162
-rw-r--r--arch/x86/events/intel/cstate.c547
-rw-r--r--arch/x86/events/intel/ds.c6
-rw-r--r--arch/x86/events/intel/lbr.c31
-rw-r--r--arch/x86/events/intel/pt.c320
-rw-r--r--arch/x86/events/intel/pt.h68
-rw-r--r--arch/x86/events/intel/rapl.c183
-rw-r--r--arch/x86/events/intel/uncore.c216
-rw-r--r--arch/x86/events/intel/uncore_snbep.c7
-rw-r--r--arch/x86/events/msr.c38
-rw-r--r--arch/x86/events/perf_event.h5
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h9
-rw-r--r--arch/x86/include/asm/efi.h52
-rw-r--r--arch/x86/include/asm/mce.h19
-rw-r--r--arch/x86/include/asm/msr-index.h35
-rw-r--r--arch/x86/include/asm/rwsem.h42
-rw-r--r--arch/x86/include/asm/uaccess.h8
-rw-r--r--arch/x86/kernel/cpu/common.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-genpool.c46
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c30
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c160
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c94
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/signal.c23
-rw-r--r--arch/x86/kernel/sysfb_efi.c15
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kvm/emulate.c6
-rw-r--r--arch/x86/lib/rwsem.S16
-rw-r--r--arch/x86/mm/pageattr.c8
-rw-r--r--arch/x86/platform/efi/efi.c133
-rw-r--r--arch/x86/platform/efi/efi_64.c10
-rw-r--r--arch/x86/platform/efi/quirks.c10
-rw-r--r--arch/x86/ras/Kconfig2
-rw-r--r--arch/x86/ras/Makefile2
-rw-r--r--arch/x86/ras/mce_amd_inj.c31
48 files changed, 1725 insertions, 1223 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2dc18605831f..a494fa34713a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -164,10 +164,6 @@ config INSTRUCTION_DECODER
def_bool y
depends on KPROBES || PERF_EVENTS || UPROBES
-config PERF_EVENTS_INTEL_UNCORE
- def_bool y
- depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
-
config OUTPUT_FORMAT
string
default "elf32-i386" if X86_32
@@ -1046,6 +1042,8 @@ config X86_THERMAL_VECTOR
def_bool y
depends on X86_MCE_INTEL
+source "arch/x86/events/Kconfig"
+
config X86_LEGACY_VM86
bool "Legacy VM86 support"
default n
@@ -1210,15 +1208,6 @@ config MICROCODE_OLD_INTERFACE
def_bool y
depends on MICROCODE
-config PERF_EVENTS_AMD_POWER
- depends on PERF_EVENTS && CPU_SUP_AMD
- tristate "AMD Processor Power Reporting Mechanism"
- ---help---
- Provide power reporting mechanism support for AMD processors.
- Currently, it leverages X86_FEATURE_ACC_POWER
- (CPUID Fn8000_0007_EDX[12]) interface to calculate the
- average power consumption on Family 15h processors.
-
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
---help---
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 583d539a4197..52fef606bc54 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -571,312 +571,6 @@ free_handle:
efi_call_early(free_pool, pci_handle);
}
-static void
-setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
- struct efi_pixel_bitmask pixel_info, int pixel_format)
-{
- if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
- si->lfb_depth = 32;
- si->lfb_linelength = pixels_per_scan_line * 4;
- si->red_size = 8;
- si->red_pos = 0;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 16;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
- } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
- si->lfb_depth = 32;
- si->lfb_linelength = pixels_per_scan_line * 4;
- si->red_size = 8;
- si->red_pos = 16;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 0;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
- } else if (pixel_format == PIXEL_BIT_MASK) {
- find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
- find_bits(pixel_info.green_mask, &si->green_pos,
- &si->green_size);
- find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
- find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
- &si->rsvd_size);
- si->lfb_depth = si->red_size + si->green_size +
- si->blue_size + si->rsvd_size;
- si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
- } else {
- si->lfb_depth = 4;
- si->lfb_linelength = si->lfb_width / 2;
- si->red_size = 0;
- si->red_pos = 0;
- si->green_size = 0;
- si->green_pos = 0;
- si->blue_size = 0;
- si->blue_pos = 0;
- si->rsvd_size = 0;
- si->rsvd_pos = 0;
- }
-}
-
-static efi_status_t
-__gop_query32(struct efi_graphics_output_protocol_32 *gop32,
- struct efi_graphics_output_mode_info **info,
- unsigned long *size, u64 *fb_base)
-{
- struct efi_graphics_output_protocol_mode_32 *mode;
- efi_status_t status;
- unsigned long m;
-
- m = gop32->mode;
- mode = (struct efi_graphics_output_protocol_mode_32 *)m;
-
- status = efi_early->call(gop32->query_mode, gop32,
- mode->mode, size, info);
- if (status != EFI_SUCCESS)
- return status;
-
- *fb_base = mode->frame_buffer_base;
- return status;
-}
-
-static efi_status_t
-setup_gop32(struct screen_info *si, efi_guid_t *proto,
- unsigned long size, void **gop_handle)
-{
- struct efi_graphics_output_protocol_32 *gop32, *first_gop;
- unsigned long nr_gops;
- u16 width, height;
- u32 pixels_per_scan_line;
- u32 ext_lfb_base;
- u64 fb_base;
- struct efi_pixel_bitmask pixel_info;
- int pixel_format;
- efi_status_t status;
- u32 *handles = (u32 *)(unsigned long)gop_handle;
- int i;
-
- first_gop = NULL;
- gop32 = NULL;
-
- nr_gops = size / sizeof(u32);
- for (i = 0; i < nr_gops; i++) {
- struct efi_graphics_output_mode_info *info = NULL;
- efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
- bool conout_found = false;
- void *dummy = NULL;
- u32 h = handles[i];
- u64 current_fb_base;
-
- status = efi_call_early(handle_protocol, h,
- proto, (void **)&gop32);
- if (status != EFI_SUCCESS)
- continue;
-
- status = efi_call_early(handle_protocol, h,
- &conout_proto, &dummy);
- if (status == EFI_SUCCESS)
- conout_found = true;
-
- status = __gop_query32(gop32, &info, &size, &current_fb_base);
- if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
- /*
- * Systems that use the UEFI Console Splitter may
- * provide multiple GOP devices, not all of which are
- * backed by real hardware. The workaround is to search
- * for a GOP implementing the ConOut protocol, and if
- * one isn't found, to just fall back to the first GOP.
- */
- width = info->horizontal_resolution;
- height = info->vertical_resolution;
- pixel_format = info->pixel_format;
- pixel_info = info->pixel_information;
- pixels_per_scan_line = info->pixels_per_scan_line;
- fb_base = current_fb_base;
-
- /*
- * Once we've found a GOP supporting ConOut,
- * don't bother looking any further.
- */
- first_gop = gop32;
- if (conout_found)
- break;
- }
- }
-
- /* Did we find any GOPs? */
- if (!first_gop)
- goto out;
-
- /* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
- si->lfb_width = width;
- si->lfb_height = height;
- si->lfb_base = fb_base;
-
- ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
- if (ext_lfb_base) {
- si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
- si->ext_lfb_base = ext_lfb_base;
- }
-
- si->pages = 1;
-
- setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
- si->lfb_size = si->lfb_linelength * si->lfb_height;
-
- si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
- return status;
-}
-
-static efi_status_t
-__gop_query64(struct efi_graphics_output_protocol_64 *gop64,
- struct efi_graphics_output_mode_info **info,
- unsigned long *size, u64 *fb_base)
-{
- struct efi_graphics_output_protocol_mode_64 *mode;
- efi_status_t status;
- unsigned long m;
-
- m = gop64->mode;
- mode = (struct efi_graphics_output_protocol_mode_64 *)m;
-
- status = efi_early->call(gop64->query_mode, gop64,
- mode->mode, size, info);
- if (status != EFI_SUCCESS)
- return status;
-
- *fb_base = mode->frame_buffer_base;
- return status;
-}
-
-static efi_status_t
-setup_gop64(struct screen_info *si, efi_guid_t *proto,
- unsigned long size, void **gop_handle)
-{
- struct efi_graphics_output_protocol_64 *gop64, *first_gop;
- unsigned long nr_gops;
- u16 width, height;
- u32 pixels_per_scan_line;
- u32 ext_lfb_base;
- u64 fb_base;
- struct efi_pixel_bitmask pixel_info;
- int pixel_format;
- efi_status_t status;
- u64 *handles = (u64 *)(unsigned long)gop_handle;
- int i;
-
- first_gop = NULL;
- gop64 = NULL;
-
- nr_gops = size / sizeof(u64);
- for (i = 0; i < nr_gops; i++) {
- struct efi_graphics_output_mode_info *info = NULL;
- efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
- bool conout_found = false;
- void *dummy = NULL;
- u64 h = handles[i];
- u64 current_fb_base;
-
- status = efi_call_early(handle_protocol, h,
- proto, (void **)&gop64);
- if (status != EFI_SUCCESS)
- continue;
-
- status = efi_call_early(handle_protocol, h,
- &conout_proto, &dummy);
- if (status == EFI_SUCCESS)
- conout_found = true;
-
- status = __gop_query64(gop64, &info, &size, &current_fb_base);
- if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
- /*
- * Systems that use the UEFI Console Splitter may
- * provide multiple GOP devices, not all of which are
- * backed by real hardware. The workaround is to search
- * for a GOP implementing the ConOut protocol, and if
- * one isn't found, to just fall back to the first GOP.
- */
- width = info->horizontal_resolution;
- height = info->vertical_resolution;
- pixel_format = info->pixel_format;
- pixel_info = info->pixel_information;
- pixels_per_scan_line = info->pixels_per_scan_line;
- fb_base = current_fb_base;
-
- /*
- * Once we've found a GOP supporting ConOut,
- * don't bother looking any further.
- */
- first_gop = gop64;
- if (conout_found)
- break;
- }
- }
-
- /* Did we find any GOPs? */
- if (!first_gop)
- goto out;
-
- /* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
- si->lfb_width = width;
- si->lfb_height = height;
- si->lfb_base = fb_base;
-
- ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
- if (ext_lfb_base) {
- si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
- si->ext_lfb_base = ext_lfb_base;
- }
-
- si->pages = 1;
-
- setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
- si->lfb_size = si->lfb_linelength * si->lfb_height;
-
- si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
- return status;
-}
-
-/*
- * See if we have Graphics Output Protocol
- */
-static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
- unsigned long size)
-{
- efi_status_t status;
- void **gop_handle = NULL;
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)&gop_handle);
- if (status != EFI_SUCCESS)
- return status;
-
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- proto, NULL, &size, gop_handle);
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- if (efi_early->is64)
- status = setup_gop64(si, proto, size, gop_handle);
- else
- status = setup_gop32(si, proto, size, gop_handle);
-
-free_handle:
- efi_call_early(free_pool, gop_handle);
- return status;
-}
-
static efi_status_t
setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
{
@@ -1038,7 +732,7 @@ void setup_graphics(struct boot_params *boot_params)
EFI_LOCATE_BY_PROTOCOL,
&graphics_proto, NULL, &size, gop_handle);
if (status == EFI_BUFFER_TOO_SMALL)
- status = setup_gop(si, &graphics_proto, size);
+ status = efi_setup_gop(NULL, si, &graphics_proto, size);
if (status != EFI_SUCCESS) {
size = 0;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index d487e727f1ec..c0223f1a89d7 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -11,80 +11,6 @@
#define DESC_TYPE_CODE_DATA (1 << 0)
-#define EFI_CONSOLE_OUT_DEVICE_GUID \
- EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
- 0x3f, 0xc1, 0x4d)
-
-#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
-#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
-#define PIXEL_BIT_MASK 2
-#define PIXEL_BLT_ONLY 3
-#define PIXEL_FORMAT_MAX 4
-
-struct efi_pixel_bitmask {
- u32 red_mask;
- u32 green_mask;
- u32 blue_mask;
- u32 reserved_mask;
-};
-
-struct efi_graphics_output_mode_info {
- u32 version;
- u32 horizontal_resolution;
- u32 vertical_resolution;
- int pixel_format;
- struct efi_pixel_bitmask pixel_information;
- u32 pixels_per_scan_line;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_32 {
- u32 max_mode;
- u32 mode;
- u32 info;
- u32 size_of_info;
- u64 frame_buffer_base;
- u32 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_64 {
- u32 max_mode;
- u32 mode;
- u64 info;
- u64 size_of_info;
- u64 frame_buffer_base;
- u64 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode {
- u32 max_mode;
- u32 mode;
- unsigned long info;
- unsigned long size_of_info;
- u64 frame_buffer_base;
- unsigned long frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_32 {
- u32 query_mode;
- u32 set_mode;
- u32 blt;
- u32 mode;
-};
-
-struct efi_graphics_output_protocol_64 {
- u64 query_mode;
- u64 set_mode;
- u64 blt;
- u64 mode;
-};
-
-struct efi_graphics_output_protocol {
- void *query_mode;
- unsigned long set_mode;
- unsigned long blt;
- struct efi_graphics_output_protocol_mode *mode;
-};
-
struct efi_uga_draw_protocol_32 {
u32 get_mode;
u32 set_mode;
diff --git a/arch/x86/configs/kvm_guest.config b/arch/x86/configs/kvm_guest.config
index f9affcc3b9f1..9906505c998a 100644
--- a/arch/x86/configs/kvm_guest.config
+++ b/arch/x86/configs/kvm_guest.config
@@ -26,3 +26,6 @@ CONFIG_VIRTIO_NET=y
CONFIG_9P_FS=y
CONFIG_NET_9P=y
CONFIG_NET_9P_VIRTIO=y
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_VIRTIO_INPUT=y
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index b30dd8154cc2..4cddd17153fb 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -384,5 +384,5 @@
375 i386 membarrier sys_membarrier
376 i386 mlock2 sys_mlock2
377 i386 copy_file_range sys_copy_file_range
-378 i386 preadv2 sys_preadv2
-379 i386 pwritev2 sys_pwritev2
+378 i386 preadv2 sys_preadv2 compat_sys_preadv2
+379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
new file mode 100644
index 000000000000..98397db5ceae
--- /dev/null
+++ b/arch/x86/events/Kconfig
@@ -0,0 +1,36 @@
+menu "Performance monitoring"
+
+config PERF_EVENTS_INTEL_UNCORE
+ tristate "Intel uncore performance events"
+ depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ default y
+ ---help---
+ Include support for Intel uncore performance events. These are
+ available on NehalemEX and more modern processors.
+
+config PERF_EVENTS_INTEL_RAPL
+ tristate "Intel rapl performance events"
+ depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ default y
+ ---help---
+ Include support for Intel rapl performance events for power
+ monitoring on modern processors.
+
+config PERF_EVENTS_INTEL_CSTATE
+ tristate "Intel cstate performance events"
+ depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ default y
+ ---help---
+ Include support for Intel cstate performance events for power
+ monitoring on modern processors.
+
+config PERF_EVENTS_AMD_POWER
+ depends on PERF_EVENTS && CPU_SUP_AMD
+ tristate "AMD Processor Power Reporting Mechanism"
+ ---help---
+ Provide power reporting mechanism support for AMD processors.
+ Currently, it leverages X86_FEATURE_ACC_POWER
+ (CPUID Fn8000_0007_EDX[12]) interface to calculate the
+ average power consumption on Family 15h processors.
+
+endmenu
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index f59618a39990..1d392c39fe56 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -6,9 +6,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
endif
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
+
+obj-$(CONFIG_CPU_SUP_INTEL) += msr.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 3db9569e658c..98ac57381bf9 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -263,6 +263,7 @@ static const struct attribute_group *amd_uncore_attr_groups[] = {
};
static struct pmu amd_nb_pmu = {
+ .task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_attr_groups,
.name = "amd_nb",
.event_init = amd_uncore_event_init,
@@ -274,6 +275,7 @@ static struct pmu amd_nb_pmu = {
};
static struct pmu amd_l2_pmu = {
+ .task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_attr_groups,
.name = "amd_l2",
.event_init = amd_uncore_event_init,
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index dd39fde66b54..b7080bef9137 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -360,6 +360,9 @@ int x86_add_exclusive(unsigned int what)
{
int i;
+ if (x86_pmu.lbr_pt_coexist)
+ return 0;
+
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
mutex_lock(&pmc_reserve_mutex);
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
@@ -380,6 +383,9 @@ fail_unlock:
void x86_del_exclusive(unsigned int what)
{
+ if (x86_pmu.lbr_pt_coexist)
+ return;
+
atomic_dec(&x86_pmu.lbr_exclusive[what]);
atomic_dec(&active_events);
}
@@ -2277,7 +2283,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ while (entry->nr < sysctl_perf_event_max_stack) {
unsigned long bytes;
frame.next_frame = 0;
frame.return_address = 0;
@@ -2337,7 +2343,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
pagefault_disable();
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ while (entry->nr < sysctl_perf_event_max_stack) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
new file mode 100644
index 000000000000..3660b2cf245a
--- /dev/null
+++ b/arch/x86/events/intel/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
+obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o
+intel-rapl-objs := rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
+intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
+intel-cstate-objs := cstate.o
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index b99dc9258c0f..0a6e393a2e62 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -171,18 +171,6 @@ static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
memset(page_address(phys->page) + index, 0, phys->size - index);
}
-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
-{
- if (buf->snapshot)
- return false;
-
- if (local_read(&buf->data_size) >= bts->handle.size ||
- bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
- return true;
-
- return false;
-}
-
static void bts_update(struct bts_ctx *bts)
{
int cpu = raw_smp_processor_id();
@@ -213,18 +201,15 @@ static void bts_update(struct bts_ctx *bts)
}
}
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
+
static void __bts_event_start(struct perf_event *event)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct bts_buffer *buf = perf_get_aux(&bts->handle);
u64 config = 0;
- if (!buf || bts_buffer_is_full(buf, bts))
- return;
-
- event->hw.itrace_started = 1;
- event->hw.state = 0;
-
if (!buf->snapshot)
config |= ARCH_PERFMON_EVENTSEL_INT;
if (!event->attr.exclude_kernel)
@@ -241,16 +226,41 @@ static void __bts_event_start(struct perf_event *event)
wmb();
intel_pmu_enable_bts(config);
+
}
static void bts_event_start(struct perf_event *event, int flags)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_buffer *buf;
+
+ buf = perf_aux_output_begin(&bts->handle, event);
+ if (!buf)
+ goto fail_stop;
+
+ if (bts_buffer_reset(buf, &bts->handle))
+ goto fail_end_stop;
+
+ bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+ bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+ bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+ event->hw.itrace_started = 1;
+ event->hw.state = 0;
__bts_event_start(event);
/* PMI handler: this counter is running and likely generating PMIs */
ACCESS_ONCE(bts->started) = 1;
+
+ return;
+
+fail_end_stop:
+ perf_aux_output_end(&bts->handle, 0, false);
+
+fail_stop:
+ event->hw.state = PERF_HES_STOPPED;
}
static void __bts_event_stop(struct perf_event *event)
@@ -269,15 +279,32 @@ static void __bts_event_stop(struct perf_event *event)
static void bts_event_stop(struct perf_event *event, int flags)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_buffer *buf = perf_get_aux(&bts->handle);
/* PMI handler: don't restart this counter */
ACCESS_ONCE(bts->started) = 0;
__bts_event_stop(event);
- if (flags & PERF_EF_UPDATE)
+ if (flags & PERF_EF_UPDATE) {
bts_update(bts);
+
+ if (buf) {
+ if (buf->snapshot)
+ bts->handle.head =
+ local_xchg(&buf->data_size,
+ buf->nr_pages << PAGE_SHIFT);
+ perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+ !!local_xchg(&buf->lost, 0));
+ }
+
+ cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+ cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+ cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+ cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+ }
}
void intel_bts_enable_local(void)
@@ -417,34 +444,14 @@ int intel_bts_interrupt(void)
static void bts_event_del(struct perf_event *event, int mode)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
-
bts_event_stop(event, PERF_EF_UPDATE);
-
- if (buf) {
- if (buf->snapshot)
- bts->handle.head =
- local_xchg(&buf->data_size,
- buf->nr_pages << PAGE_SHIFT);
- perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
- !!local_xchg(&buf->lost, 0));
- }
-
- cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
- cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
- cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
- cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
}
static int bts_event_add(struct perf_event *event, int mode)
{
- struct bts_buffer *buf;
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- int ret = -EBUSY;
event->hw.state = PERF_HES_STOPPED;
@@ -454,26 +461,10 @@ static int bts_event_add(struct perf_event *event, int mode)
if (bts->handle.event)
return -EBUSY;
- buf = perf_aux_output_begin(&bts->handle, event);
- if (!buf)
- return -EINVAL;
-
- ret = bts_buffer_reset(buf, &bts->handle);
- if (ret) {
- perf_aux_output_end(&bts->handle, 0, false);
- return ret;
- }
-
- bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
- bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
- bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
-
if (mode & PERF_EF_START) {
bts_event_start(event, 0);
- if (hwc->state & PERF_HES_STOPPED) {
- bts_event_del(event, 0);
- return -EBUSY;
- }
+ if (hwc->state & PERF_HES_STOPPED)
+ return -EINVAL;
}
return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6fd4dbcf820..7c666958a625 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids
},
};
+static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+#define GLM_DEMAND_DATA_RD BIT_ULL(0)
+#define GLM_DEMAND_RFO BIT_ULL(1)
+#define GLM_ANY_RESPONSE BIT_ULL(16)
+#define GLM_SNP_NONE_OR_MISS BIT_ULL(33)
+#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD
+#define GLM_DEMAND_WRITE GLM_DEMAND_RFO
+#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+#define GLM_LLC_ACCESS GLM_ANY_RESPONSE
+#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
+#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 glm_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
+ [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+};
+
+static __initconst const u64 glm_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = GLM_DEMAND_READ|
+ GLM_LLC_ACCESS,
+ [C(RESULT_MISS)] = GLM_DEMAND_READ|
+ GLM_LLC_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
+ GLM_LLC_ACCESS,
+ [C(RESULT_MISS)] = GLM_DEMAND_WRITE|
+ GLM_LLC_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH|
+ GLM_LLC_ACCESS,
+ [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH|
+ GLM_LLC_MISS,
+ },
+ },
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -3447,7 +3581,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_atom();
+ intel_pmu_lbr_init_slm();
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
@@ -3456,6 +3590,30 @@ __init int intel_pmu_init(void)
pr_cont("Silvermont events, ");
break;
+ case 92: /* 14nm Atom "Goldmont" */
+ case 95: /* 14nm Atom "Goldmont Denverton" */
+ memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_glm_extra_regs;
+ /*
+ * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+ * for precise cycles.
+ * :pp is identical to :ppp
+ */
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ pr_cont("Goldmont events, ");
+ break;
+
case 37: /* 32nm Westmere */
case 44: /* 32nm Westmere-EP */
case 47: /* 32nm Westmere-EX */
@@ -3708,7 +3866,7 @@ __init int intel_pmu_init(void)
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
}
c->idxmsk64 &=
- ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
+ ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
c->weight = hweight64(c->idxmsk64);
}
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 7946c4231169..9ba4e4136a15 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -91,6 +91,8 @@
#include <asm/cpu_device_id.h>
#include "../perf_event.h"
+MODULE_LICENSE("GPL");
+
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
@@ -106,22 +108,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf);
+/* Model -> events mapping */
+struct cstate_model {
+ unsigned long core_events;
+ unsigned long pkg_events;
+ unsigned long quirks;
+};
+
+/* Quirk flags */
+#define SLM_PKG_C6_USE_C7_MSR (1UL << 0)
+
struct perf_cstate_msr {
u64 msr;
struct perf_pmu_events_attr *attr;
- bool (*test)(int idx);
};
/* cstate_core PMU */
-
static struct pmu cstate_core_pmu;
static bool has_cstate_core;
-enum perf_cstate_core_id {
- /*
- * cstate_core events
- */
+enum perf_cstate_core_events {
PERF_CSTATE_CORE_C1_RES = 0,
PERF_CSTATE_CORE_C3_RES,
PERF_CSTATE_CORE_C6_RES,
@@ -130,69 +137,16 @@ enum perf_cstate_core_id {
PERF_CSTATE_CORE_EVENT_MAX,
};
-bool test_core(int idx)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
- return false;
-
- switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES ||
- idx == PERF_CSTATE_CORE_C7_RES)
- return true;
- break;
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
- if (idx == PERF_CSTATE_CORE_C1_RES ||
- idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- }
-
- return false;
-}
-
PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
static struct perf_cstate_msr core_msr[] = {
- [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
- [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
- [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
- [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
};
static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
@@ -234,18 +188,11 @@ static const struct attribute_group *core_attr_groups[] = {
NULL,
};
-/* cstate_core PMU end */
-
-
/* cstate_pkg PMU */
-
static struct pmu cstate_pkg_pmu;
static bool has_cstate_pkg;
-enum perf_cstate_pkg_id {
- /*
- * cstate_pkg events
- */
+enum perf_cstate_pkg_events {
PERF_CSTATE_PKG_C2_RES = 0,
PERF_CSTATE_PKG_C3_RES,
PERF_CSTATE_PKG_C6_RES,
@@ -257,69 +204,6 @@ enum perf_cstate_pkg_id {
PERF_CSTATE_PKG_EVENT_MAX,
};
-bool test_pkg(int idx)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
- return false;
-
- switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES ||
- idx == PERF_CSTATE_CORE_C7_RES)
- return true;
- break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- if (idx == PERF_CSTATE_PKG_C2_RES ||
- idx == PERF_CSTATE_PKG_C3_RES ||
- idx == PERF_CSTATE_PKG_C6_RES ||
- idx == PERF_CSTATE_PKG_C7_RES)
- return true;
- break;
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
- if (idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- case 69: /* 22nm Haswell ULT */
- if (idx == PERF_CSTATE_PKG_C2_RES ||
- idx == PERF_CSTATE_PKG_C3_RES ||
- idx == PERF_CSTATE_PKG_C6_RES ||
- idx == PERF_CSTATE_PKG_C7_RES ||
- idx == PERF_CSTATE_PKG_C8_RES ||
- idx == PERF_CSTATE_PKG_C9_RES ||
- idx == PERF_CSTATE_PKG_C10_RES)
- return true;
- break;
- }
-
- return false;
-}
-
PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
@@ -329,13 +213,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
static struct perf_cstate_msr pkg_msr[] = {
- [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
- [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
- [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
- [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
- [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
- [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
- [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
};
static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
@@ -366,8 +250,6 @@ static const struct attribute_group *pkg_attr_groups[] = {
NULL,
};
-/* cstate_pkg PMU end*/
-
static ssize_t cstate_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -385,7 +267,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
static int cstate_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
- int ret = 0;
+ int cpu;
if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -400,26 +282,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */
return -EINVAL;
+ if (event->cpu < 0)
+ return -EINVAL;
+
if (event->pmu == &cstate_core_pmu) {
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
return -EINVAL;
if (!core_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
+ cpu = cpumask_any_and(&cstate_core_cpu_mask,
+ topology_sibling_cpumask(event->cpu));
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
if (!pkg_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
- } else
+ cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
+ topology_core_cpumask(event->cpu));
+ } else {
return -ENOENT;
+ }
+
+ if (cpu >= nr_cpu_ids)
+ return -ENODEV;
- /* must be done before validate_group */
+ event->cpu = cpu;
event->hw.config = cfg;
event->hw.idx = -1;
-
- return ret;
+ return 0;
}
static inline u64 cstate_pmu_read_counter(struct perf_event *event)
@@ -469,172 +361,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
return 0;
}
+/*
+ * Check if exiting cpu is the designated reader. If so migrate the
+ * events when there is a valid target available
+ */
static void cstate_cpu_exit(int cpu)
{
- int i, id, target;
+ unsigned int target;
- /* cpu exit for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- target = -1;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_core_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+ if (has_cstate_core &&
+ cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
+
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_core_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
- if (target >= 0)
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+ }
}
- /* cpu exit for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- target = -1;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+ if (has_cstate_pkg &&
+ cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
+
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
- if (target >= 0)
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+ }
}
}
static void cstate_cpu_init(int cpu)
{
- int i, id;
+ unsigned int target;
- /* cpu init for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- for_each_cpu(i, &cstate_core_cpu_mask) {
- if (id == topology_core_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
- }
+ /*
+ * If this is the first online thread of that core, set it in
+ * the core cpu mask as the designated reader.
+ */
+ target = cpumask_any_and(&cstate_core_cpu_mask,
+ topology_sibling_cpumask(cpu));
- /* cpu init for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- for_each_cpu(i, &cstate_pkg_cpu_mask) {
- if (id == topology_physical_package_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
- }
+ if (has_cstate_core && target >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+
+ /*
+ * If this is the first online thread of that package, set it
+ * in the package cpu mask as the designated reader.
+ */
+ target = cpumask_any_and(&cstate_pkg_cpu_mask,
+ topology_core_cpumask(cpu));
+ if (has_cstate_pkg && target >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
}
static int cstate_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- break;
case CPU_STARTING:
cstate_cpu_init(cpu);
break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- break;
- case CPU_ONLINE:
- case CPU_DEAD:
- break;
case CPU_DOWN_PREPARE:
cstate_cpu_exit(cpu);
break;
default:
break;
}
-
return NOTIFY_OK;
}
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
- struct attribute **events_attrs,
- int max_event_nr)
-{
- int i, j = 0;
- u64 val;
-
- /* Probe the cstate events. */
- for (i = 0; i < max_event_nr; i++) {
- if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
- msr[i].attr = NULL;
- }
-
- /* List remaining events in the sysfs attrs. */
- for (i = 0; i < max_event_nr; i++) {
- if (msr[i].attr)
- events_attrs[j++] = &msr[i].attr->attr.attr;
- }
- events_attrs[j] = NULL;
-
- return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
- /* SLM has different MSR for PKG C6 */
- switch (boot_cpu_data.x86_model) {
- case 55:
- case 76:
- case 77:
- pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
- }
-
- if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
- has_cstate_core = true;
-
- if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
- has_cstate_pkg = true;
-
- return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
- int cpu;
-
- cpu_notifier_register_begin();
-
- for_each_online_cpu(cpu)
- cstate_cpu_init(cpu);
-
- __perf_cpu_notifier(cstate_cpu_notifier);
-
- cpu_notifier_register_done();
-}
+static struct notifier_block cstate_cpu_nb = {
+ .notifier_call = cstate_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
static struct pmu cstate_core_pmu = {
.attr_groups = core_attr_groups,
.name = "cstate_core",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
- .add = cstate_pmu_event_add, /* must have */
- .del = cstate_pmu_event_del, /* must have */
+ .add = cstate_pmu_event_add,
+ .del = cstate_pmu_event_del,
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
@@ -646,49 +457,203 @@ static struct pmu cstate_pkg_pmu = {
.name = "cstate_pkg",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
- .add = cstate_pmu_event_add, /* must have */
- .del = cstate_pmu_event_del, /* must have */
+ .add = cstate_pmu_event_add,
+ .del = cstate_pmu_event_del,
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
};
-static void __init cstate_pmus_register(void)
+static const struct cstate_model nhm_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model snb_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model hswult_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES) |
+ BIT(PERF_CSTATE_PKG_C8_RES) |
+ BIT(PERF_CSTATE_PKG_C9_RES) |
+ BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
+static const struct cstate_model slm_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES),
+ .quirks = SLM_PKG_C6_USE_C7_MSR,
+};
+
+#define X86_CSTATES_MODEL(model, states) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
+
+static const struct x86_cpu_id intel_cstates_match[] __initconst = {
+ X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
+ X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
+ X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
+
+ X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
+ X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
+ X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
+
+ X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
+ X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
+
+ X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
+ X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
+
+ X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
+ X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
+ X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
+
+ X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
+
+ X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
+ X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
+ X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
+
+ X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
+ X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
+ X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
+ X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
+
+ X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
+ X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
+ { },
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there are no available events.
+ */
+static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
+ struct perf_cstate_msr *msr,
+ struct attribute **attrs)
{
- int err;
+ bool found = false;
+ unsigned int bit;
+ u64 val;
+
+ for (bit = 0; bit < max; bit++) {
+ if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
+ *attrs++ = &msr[bit].attr->attr.attr;
+ found = true;
+ } else {
+ msr[bit].attr = NULL;
+ }
+ }
+ *attrs = NULL;
+
+ return found;
+}
+
+static int __init cstate_probe(const struct cstate_model *cm)
+{
+ /* SLM has different MSR for PKG C6 */
+ if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
+ pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+
+ has_cstate_core = cstate_probe_msr(cm->core_events,
+ PERF_CSTATE_CORE_EVENT_MAX,
+ core_msr, core_events_attrs);
+
+ has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
+ PERF_CSTATE_PKG_EVENT_MAX,
+ pkg_msr, pkg_events_attrs);
+
+ return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static inline void cstate_cleanup(void)
+{
+ if (has_cstate_core)
+ perf_pmu_unregister(&cstate_core_pmu);
+
+ if (has_cstate_pkg)
+ perf_pmu_unregister(&cstate_pkg_pmu);
+}
+
+static int __init cstate_init(void)
+{
+ int cpu, err;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ cstate_cpu_init(cpu);
if (has_cstate_core) {
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
- if (WARN_ON(err))
- pr_info("Failed to register PMU %s error %d\n",
- cstate_core_pmu.name, err);
+ if (err) {
+ has_cstate_core = false;
+ pr_info("Failed to register cstate core pmu\n");
+ goto out;
+ }
}
if (has_cstate_pkg) {
err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
- if (WARN_ON(err))
- pr_info("Failed to register PMU %s error %d\n",
- cstate_pkg_pmu.name, err);
+ if (err) {
+ has_cstate_pkg = false;
+ pr_info("Failed to register cstate pkg pmu\n");
+ cstate_cleanup();
+ goto out;
+ }
}
+ __register_cpu_notifier(&cstate_cpu_nb);
+out:
+ cpu_notifier_register_done();
+ return err;
}
static int __init cstate_pmu_init(void)
{
+ const struct x86_cpu_id *id;
int err;
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
+ id = x86_match_cpu(intel_cstates_match);
+ if (!id)
return -ENODEV;
- err = cstate_init();
+ err = cstate_probe((const struct cstate_model *) id->driver_data);
if (err)
return err;
- cstate_cpumask_init();
-
- cstate_pmus_register();
-
- return 0;
+ return cstate_init();
}
+module_init(cstate_pmu_init);
-device_initcall(cstate_pmu_init);
+static void __exit cstate_pmu_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&cstate_cpu_nb);
+ cstate_cleanup();
+ cpu_notifier_register_done();
+}
+module_exit(cstate_pmu_exit);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8584b90d8e0b..7ce9f3f669e6 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_glm_pebs_event_constraints[] = {
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 1ca5d1e7d4f2..9e2b40cdb05f 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -14,7 +14,8 @@ enum {
LBR_FORMAT_EIP_FLAGS = 0x03,
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
+ LBR_FORMAT_TIME = 0x06,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
};
static enum {
@@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
abort = !!(info & LBR_INFO_ABORT);
cycles = (info & LBR_INFO_CYCLES);
}
+
+ if (lbr_format == LBR_FORMAT_TIME) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
+
if (lbr_flags & LBR_EIP_FLAGS) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
@@ -1049,6 +1060,24 @@ void __init intel_pmu_lbr_init_atom(void)
pr_cont("8-deep LBR, ");
}
+/* slm */
+void __init intel_pmu_lbr_init_slm(void)
+{
+ x86_pmu.lbr_nr = 8;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("8-deep LBR, ");
+}
+
/* Knights Landing */
void intel_pmu_lbr_init_knl(void)
{
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 09a77dbc73c9..04bb5fb5a8d7 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -67,11 +67,13 @@ static struct pt_cap_desc {
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
+ PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)),
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
+ PT_CAP(num_address_ranges, 1, CR_EAX, 0x3),
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
@@ -125,9 +127,46 @@ static struct attribute_group pt_format_group = {
.attrs = pt_formats_attr,
};
+static ssize_t
+pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ switch (pmu_attr->id) {
+ case 0:
+ return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
+ case 1:
+ return sprintf(page, "%u:%u\n",
+ pt_pmu.tsc_art_num,
+ pt_pmu.tsc_art_den);
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
+PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
+ pt_timing_attr_show);
+PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
+ pt_timing_attr_show);
+
+static struct attribute *pt_timing_attr[] = {
+ &timing_attr_max_nonturbo_ratio.attr.attr,
+ &timing_attr_tsc_art_ratio.attr.attr,
+ NULL,
+};
+
+static struct attribute_group pt_timing_group = {
+ .attrs = pt_timing_attr,
+};
+
static const struct attribute_group *pt_attr_groups[] = {
&pt_cap_group,
&pt_format_group,
+ &pt_timing_group,
NULL,
};
@@ -140,6 +179,23 @@ static int __init pt_pmu_hw_init(void)
int ret;
long i;
+ rdmsrl(MSR_PLATFORM_INFO, reg);
+ pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
+
+ /*
+ * if available, read in TSC to core crystal clock ratio,
+ * otherwise, zero for numerator stands for "not enumerated"
+ * as per SDM
+ */
+ if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
+
+ pt_pmu.tsc_art_num = ebx;
+ pt_pmu.tsc_art_den = eax;
+ }
+
if (boot_cpu_has(X86_FEATURE_VMX)) {
/*
* Intel SDM, 36.5 "Tracing post-VMXON" says that
@@ -263,6 +319,75 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
+/* Address ranges and their corresponding msr configuration registers */
+static const struct pt_address_range {
+ unsigned long msr_a;
+ unsigned long msr_b;
+ unsigned int reg_off;
+} pt_address_ranges[] = {
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR0_A,
+ .msr_b = MSR_IA32_RTIT_ADDR0_B,
+ .reg_off = RTIT_CTL_ADDR0_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR1_A,
+ .msr_b = MSR_IA32_RTIT_ADDR1_B,
+ .reg_off = RTIT_CTL_ADDR1_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR2_A,
+ .msr_b = MSR_IA32_RTIT_ADDR2_B,
+ .reg_off = RTIT_CTL_ADDR2_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR3_A,
+ .msr_b = MSR_IA32_RTIT_ADDR3_B,
+ .reg_off = RTIT_CTL_ADDR3_OFFSET,
+ }
+};
+
+static u64 pt_config_filters(struct perf_event *event)
+{
+ struct pt_filters *filters = event->hw.addr_filters;
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ unsigned int range = 0;
+ u64 rtit_ctl = 0;
+
+ if (!filters)
+ return 0;
+
+ perf_event_addr_filters_sync(event);
+
+ for (range = 0; range < filters->nr_filters; range++) {
+ struct pt_filter *filter = &filters->filter[range];
+
+ /*
+ * Note, if the range has zero start/end addresses due
+ * to its dynamic object not being loaded yet, we just
+ * go ahead and program zeroed range, which will simply
+ * produce no data. Note^2: if executable code at 0x0
+ * is a concern, we can set up an "invalid" configuration
+ * such as msr_b < msr_a.
+ */
+
+ /* avoid redundant msr writes */
+ if (pt->filters.filter[range].msr_a != filter->msr_a) {
+ wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+ pt->filters.filter[range].msr_a = filter->msr_a;
+ }
+
+ if (pt->filters.filter[range].msr_b != filter->msr_b) {
+ wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+ pt->filters.filter[range].msr_b = filter->msr_b;
+ }
+
+ rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+ }
+
+ return rtit_ctl;
+}
+
static void pt_config(struct perf_event *event)
{
u64 reg;
@@ -272,7 +397,8 @@ static void pt_config(struct perf_event *event)
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
- reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+ reg = pt_config_filters(event);
+ reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
if (!event->attr.exclude_kernel)
reg |= RTIT_CTL_OS;
@@ -709,6 +835,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
/* clear STOP and INT from current entry */
buf->topa_index[buf->stop_pos]->stop = 0;
+ buf->topa_index[buf->stop_pos]->intr = 0;
buf->topa_index[buf->intr_pos]->intr = 0;
/* how many pages till the STOP marker */
@@ -733,6 +860,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
buf->intr_pos = idx;
buf->topa_index[buf->stop_pos]->stop = 1;
+ buf->topa_index[buf->stop_pos]->intr = 1;
buf->topa_index[buf->intr_pos]->intr = 1;
return 0;
@@ -919,24 +1047,80 @@ static void pt_buffer_free_aux(void *data)
kfree(buf);
}
-/**
- * pt_buffer_is_full() - check if the buffer is full
- * @buf: PT buffer.
- * @pt: Per-cpu pt handle.
- *
- * If the user hasn't read data from the output region that aux_head
- * points to, the buffer is considered full: the user needs to read at
- * least this region and update aux_tail to point past it.
- */
-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+static int pt_addr_filters_init(struct perf_event *event)
{
- if (buf->snapshot)
- return false;
+ struct pt_filters *filters;
+ int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+ if (!pt_cap_get(PT_CAP_num_address_ranges))
+ return 0;
+
+ filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
+ if (!filters)
+ return -ENOMEM;
+
+ if (event->parent)
+ memcpy(filters, event->parent->hw.addr_filters,
+ sizeof(*filters));
+
+ event->hw.addr_filters = filters;
+
+ return 0;
+}
+
+static void pt_addr_filters_fini(struct perf_event *event)
+{
+ kfree(event->hw.addr_filters);
+ event->hw.addr_filters = NULL;
+}
+
+static int pt_event_addr_filters_validate(struct list_head *filters)
+{
+ struct perf_addr_filter *filter;
+ int range = 0;
+
+ list_for_each_entry(filter, filters, entry) {
+ /* PT doesn't support single address triggers */
+ if (!filter->range)
+ return -EOPNOTSUPP;
+
+ if (!filter->inode && !kernel_ip(filter->offset))
+ return -EINVAL;
+
+ if (++range > pt_cap_get(PT_CAP_num_address_ranges))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void pt_event_addr_filters_sync(struct perf_event *event)
+{
+ struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+ unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
+ struct pt_filters *filters = event->hw.addr_filters;
+ struct perf_addr_filter *filter;
+ int range = 0;
+
+ if (!filters)
+ return;
- if (local_read(&buf->data_size) >= pt->handle.size)
- return true;
+ list_for_each_entry(filter, &head->list, entry) {
+ if (filter->inode && !offs[range]) {
+ msr_a = msr_b = 0;
+ } else {
+ /* apply the offset */
+ msr_a = filter->offset + offs[range];
+ msr_b = filter->size + msr_a;
+ }
+
+ filters->filter[range].msr_a = msr_a;
+ filters->filter[range].msr_b = msr_b;
+ filters->filter[range].config = filter->filter ? 1 : 2;
+ range++;
+ }
- return false;
+ filters->nr_filters = range;
}
/**
@@ -953,7 +1137,7 @@ void intel_pt_interrupt(void)
* after PT has been disabled by pt_event_stop(). Make sure we don't
* do anything (particularly, re-enable) for this event here.
*/
- if (!ACCESS_ONCE(pt->handle_nmi))
+ if (!READ_ONCE(pt->handle_nmi))
return;
/*
@@ -1038,23 +1222,36 @@ EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
static void pt_event_start(struct perf_event *event, int mode)
{
+ struct hw_perf_event *hwc = &event->hw;
struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ struct pt_buffer *buf;
if (READ_ONCE(pt->vmx_on))
return;
- if (!buf || pt_buffer_is_full(buf, pt)) {
- event->hw.state = PERF_HES_STOPPED;
- return;
+ buf = perf_aux_output_begin(&pt->handle, event);
+ if (!buf)
+ goto fail_stop;
+
+ pt_buffer_reset_offsets(buf, pt->handle.head);
+ if (!buf->snapshot) {
+ if (pt_buffer_reset_markers(buf, &pt->handle))
+ goto fail_end_stop;
}
- ACCESS_ONCE(pt->handle_nmi) = 1;
- event->hw.state = 0;
+ WRITE_ONCE(pt->handle_nmi, 1);
+ hwc->state = 0;
pt_config_buffer(buf->cur->table, buf->cur_idx,
buf->output_off);
pt_config(event);
+
+ return;
+
+fail_end_stop:
+ perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+ hwc->state = PERF_HES_STOPPED;
}
static void pt_event_stop(struct perf_event *event, int mode)
@@ -1065,7 +1262,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
* Protect against the PMI racing with disabling wrmsr,
* see comment in intel_pt_interrupt().
*/
- ACCESS_ONCE(pt->handle_nmi) = 0;
+ WRITE_ONCE(pt->handle_nmi, 0);
pt_config_stop(event);
@@ -1088,19 +1285,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
pt_handle_status(pt);
pt_update_head(pt);
- }
-}
-
-static void pt_event_del(struct perf_event *event, int mode)
-{
- struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct pt_buffer *buf;
- pt_event_stop(event, PERF_EF_UPDATE);
-
- buf = perf_get_aux(&pt->handle);
-
- if (buf) {
if (buf->snapshot)
pt->handle.head =
local_xchg(&buf->data_size,
@@ -1110,9 +1295,13 @@ static void pt_event_del(struct perf_event *event, int mode)
}
}
+static void pt_event_del(struct perf_event *event, int mode)
+{
+ pt_event_stop(event, PERF_EF_UPDATE);
+}
+
static int pt_event_add(struct perf_event *event, int mode)
{
- struct pt_buffer *buf;
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct hw_perf_event *hwc = &event->hw;
int ret = -EBUSY;
@@ -1120,34 +1309,18 @@ static int pt_event_add(struct perf_event *event, int mode)
if (pt->handle.event)
goto fail;
- buf = perf_aux_output_begin(&pt->handle, event);
- ret = -EINVAL;
- if (!buf)
- goto fail_stop;
-
- pt_buffer_reset_offsets(buf, pt->handle.head);
- if (!buf->snapshot) {
- ret = pt_buffer_reset_markers(buf, &pt->handle);
- if (ret)
- goto fail_end_stop;
- }
-
if (mode & PERF_EF_START) {
pt_event_start(event, 0);
- ret = -EBUSY;
+ ret = -EINVAL;
if (hwc->state == PERF_HES_STOPPED)
- goto fail_end_stop;
+ goto fail;
} else {
hwc->state = PERF_HES_STOPPED;
}
- return 0;
-
-fail_end_stop:
- perf_aux_output_end(&pt->handle, 0, true);
-fail_stop:
- hwc->state = PERF_HES_STOPPED;
+ ret = 0;
fail:
+
return ret;
}
@@ -1157,6 +1330,7 @@ static void pt_event_read(struct perf_event *event)
static void pt_event_destroy(struct perf_event *event)
{
+ pt_addr_filters_fini(event);
x86_del_exclusive(x86_lbr_exclusive_pt);
}
@@ -1171,6 +1345,11 @@ static int pt_event_init(struct perf_event *event)
if (x86_add_exclusive(x86_lbr_exclusive_pt))
return -EBUSY;
+ if (pt_addr_filters_init(event)) {
+ x86_del_exclusive(x86_lbr_exclusive_pt);
+ return -ENOMEM;
+ }
+
event->destroy = pt_event_destroy;
return 0;
@@ -1190,7 +1369,7 @@ static __init int pt_init(void)
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
- if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+ if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
return -ENODEV;
get_online_cpus();
@@ -1224,16 +1403,21 @@ static __init int pt_init(void)
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
- pt_pmu.pmu.attr_groups = pt_attr_groups;
- pt_pmu.pmu.task_ctx_nr = perf_sw_context;
- pt_pmu.pmu.event_init = pt_event_init;
- pt_pmu.pmu.add = pt_event_add;
- pt_pmu.pmu.del = pt_event_del;
- pt_pmu.pmu.start = pt_event_start;
- pt_pmu.pmu.stop = pt_event_stop;
- pt_pmu.pmu.read = pt_event_read;
- pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
- pt_pmu.pmu.free_aux = pt_buffer_free_aux;
+ pt_pmu.pmu.attr_groups = pt_attr_groups;
+ pt_pmu.pmu.task_ctx_nr = perf_sw_context;
+ pt_pmu.pmu.event_init = pt_event_init;
+ pt_pmu.pmu.add = pt_event_add;
+ pt_pmu.pmu.del = pt_event_del;
+ pt_pmu.pmu.start = pt_event_start;
+ pt_pmu.pmu.stop = pt_event_stop;
+ pt_pmu.pmu.read = pt_event_read;
+ pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
+ pt_pmu.pmu.free_aux = pt_buffer_free_aux;
+ pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
+ pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
+ pt_pmu.pmu.nr_addr_filters =
+ pt_cap_get(PT_CAP_num_address_ranges);
+
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
return ret;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 3abb5f5cccc8..efffa4a09f68 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -20,6 +20,40 @@
#define __INTEL_PT_H__
/*
+ * PT MSR bit definitions
+ */
+#define RTIT_CTL_TRACEEN BIT(0)
+#define RTIT_CTL_CYCLEACC BIT(1)
+#define RTIT_CTL_OS BIT(2)
+#define RTIT_CTL_USR BIT(3)
+#define RTIT_CTL_CR3EN BIT(7)
+#define RTIT_CTL_TOPA BIT(8)
+#define RTIT_CTL_MTC_EN BIT(9)
+#define RTIT_CTL_TSC_EN BIT(10)
+#define RTIT_CTL_DISRETC BIT(11)
+#define RTIT_CTL_BRANCH_EN BIT(13)
+#define RTIT_CTL_MTC_RANGE_OFFSET 14
+#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
+#define RTIT_CTL_CYC_THRESH_OFFSET 19
+#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
+#define RTIT_CTL_PSB_FREQ_OFFSET 24
+#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
+#define RTIT_CTL_ADDR0_OFFSET 32
+#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET)
+#define RTIT_CTL_ADDR1_OFFSET 36
+#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET)
+#define RTIT_CTL_ADDR2_OFFSET 40
+#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET)
+#define RTIT_CTL_ADDR3_OFFSET 44
+#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET)
+#define RTIT_STATUS_FILTEREN BIT(0)
+#define RTIT_STATUS_CONTEXTEN BIT(1)
+#define RTIT_STATUS_TRIGGEREN BIT(2)
+#define RTIT_STATUS_BUFFOVF BIT(3)
+#define RTIT_STATUS_ERROR BIT(4)
+#define RTIT_STATUS_STOPPED BIT(5)
+
+/*
* Single-entry ToPA: when this close to region boundary, switch
* buffers to avoid losing data.
*/
@@ -48,15 +82,20 @@ struct topa_entry {
#define PT_CPUID_LEAVES 2
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
+/* TSC to Core Crystal Clock Ratio */
+#define CPUID_TSC_LEAF 0x15
+
enum pt_capabilities {
PT_CAP_max_subleaf = 0,
PT_CAP_cr3_filtering,
PT_CAP_psb_cyc,
+ PT_CAP_ip_filtering,
PT_CAP_mtc,
PT_CAP_topa_output,
PT_CAP_topa_multiple_entries,
PT_CAP_single_range_output,
PT_CAP_payloads_lip,
+ PT_CAP_num_address_ranges,
PT_CAP_mtc_periods,
PT_CAP_cycle_thresholds,
PT_CAP_psb_periods,
@@ -66,6 +105,9 @@ struct pt_pmu {
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
bool vmx;
+ unsigned long max_nonturbo_ratio;
+ unsigned int tsc_art_num;
+ unsigned int tsc_art_den;
};
/**
@@ -104,14 +146,40 @@ struct pt_buffer {
struct topa_entry *topa_index[0];
};
+#define PT_FILTERS_NUM 4
+
+/**
+ * struct pt_filter - IP range filter configuration
+ * @msr_a: range start, goes to RTIT_ADDRn_A
+ * @msr_b: range end, goes to RTIT_ADDRn_B
+ * @config: 4-bit field in RTIT_CTL
+ */
+struct pt_filter {
+ unsigned long msr_a;
+ unsigned long msr_b;
+ unsigned long config;
+};
+
+/**
+ * struct pt_filters - IP range filtering context
+ * @filter: filters defined for this context
+ * @nr_filters: number of defined filters in the @filter array
+ */
+struct pt_filters {
+ struct pt_filter filter[PT_FILTERS_NUM];
+ unsigned int nr_filters;
+};
+
/**
* struct pt - per-cpu pt context
* @handle: perf output handle
+ * @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
*/
struct pt {
struct perf_output_handle handle;
+ struct pt_filters filters;
int handle_nmi;
int vmx_on;
};
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 1705c9d75e44..99c4bab123cd 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -27,10 +27,14 @@
* event: rapl_energy_dram
* perf code: 0x3
*
- * dram counter: consumption of the builtin-gpu domain (client only)
+ * gpu counter: consumption of the builtin-gpu domain (client only)
* event: rapl_energy_gpu
* perf code: 0x4
*
+ * psys counter: consumption of the builtin-psys domain (client only)
+ * event: rapl_energy_psys
+ * perf code: 0x5
+ *
* We manage those counters as free running (read-only). They may be
* use simultaneously by other tools, such as turbostat.
*
@@ -53,6 +57,8 @@
#include <asm/cpu_device_id.h>
#include "../perf_event.h"
+MODULE_LICENSE("GPL");
+
/*
* RAPL energy status counters
*/
@@ -64,13 +70,16 @@
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
+#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
+#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
-#define NR_RAPL_DOMAINS 0x4
+#define NR_RAPL_DOMAINS 0x5
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
"dram",
"pp1-gpu",
+ "psys",
};
/* Clients have PP0, PKG */
@@ -89,6 +98,13 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
1<<RAPL_IDX_RAM_NRG_STAT|\
1<<RAPL_IDX_PP1_NRG_STAT)
+/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
+#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
+ 1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_RAM_NRG_STAT|\
+ 1<<RAPL_IDX_PP1_NRG_STAT|\
+ 1<<RAPL_IDX_PSYS_NRG_STAT)
+
/* Knights Landing has PKG, RAM */
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT)
@@ -360,6 +376,10 @@ static int rapl_pmu_event_init(struct perf_event *event)
bit = RAPL_IDX_PP1_NRG_STAT;
msr = MSR_PP1_ENERGY_STATUS;
break;
+ case INTEL_RAPL_PSYS:
+ bit = RAPL_IDX_PSYS_NRG_STAT;
+ msr = MSR_PLATFORM_ENERGY_STATUS;
+ break;
default:
return -EINVAL;
}
@@ -414,11 +434,13 @@ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
+RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules");
/*
* we compute in 0.23 nJ increments regardless of MSR
@@ -427,6 +449,7 @@ RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890
RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
static struct attribute *rapl_events_srv_attr[] = {
EVENT_PTR(rapl_cores),
@@ -476,6 +499,27 @@ static struct attribute *rapl_events_hsw_attr[] = {
NULL,
};
+static struct attribute *rapl_events_skl_attr[] = {
+ EVENT_PTR(rapl_cores),
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_gpu),
+ EVENT_PTR(rapl_ram),
+ EVENT_PTR(rapl_psys),
+
+ EVENT_PTR(rapl_cores_unit),
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_gpu_unit),
+ EVENT_PTR(rapl_ram_unit),
+ EVENT_PTR(rapl_psys_unit),
+
+ EVENT_PTR(rapl_cores_scale),
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_gpu_scale),
+ EVENT_PTR(rapl_ram_scale),
+ EVENT_PTR(rapl_psys_scale),
+ NULL,
+};
+
static struct attribute *rapl_events_knl_attr[] = {
EVENT_PTR(rapl_pkg),
EVENT_PTR(rapl_ram),
@@ -592,6 +636,11 @@ static int rapl_cpu_notifier(struct notifier_block *self,
return NOTIFY_OK;
}
+static struct notifier_block rapl_cpu_nb = {
+ .notifier_call = rapl_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
+
static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
@@ -660,7 +709,7 @@ static int __init rapl_prepare_cpus(void)
return 0;
}
-static void __init cleanup_rapl_pmus(void)
+static void cleanup_rapl_pmus(void)
{
int i;
@@ -691,52 +740,92 @@ static int __init init_rapl_pmus(void)
return 0;
}
+#define X86_RAPL_MODEL_MATCH(model, init) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_rapl_init_fun {
+ bool apply_quirk;
+ int cntr_mask;
+ struct attribute **attrs;
+};
+
+static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_CLN,
+ .attrs = rapl_events_cln_attr,
+};
+
+static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
+ .apply_quirk = true,
+ .cntr_mask = RAPL_IDX_SRV,
+ .attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_HSW,
+ .attrs = rapl_events_hsw_attr,
+};
+
+static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_SRV,
+ .attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
+ .apply_quirk = true,
+ .cntr_mask = RAPL_IDX_KNL,
+ .attrs = rapl_events_knl_attr,
+};
+
+static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_SKL_CLN,
+ .attrs = rapl_events_skl_attr,
+};
+
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
- [1] = {},
+ X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
+ X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
+
+ X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
+ X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
+
+ X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
+ X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
+ X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
+ X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
+
+ X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
+ X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
+ X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
+ X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
+
+ X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
+
+ X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
+ X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
+ {},
};
+MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
+
static int __init rapl_pmu_init(void)
{
- bool apply_quirk = false;
+ const struct x86_cpu_id *id;
+ struct intel_rapl_init_fun *rapl_init;
+ bool apply_quirk;
int ret;
- if (!x86_match_cpu(rapl_cpu_match))
+ id = x86_match_cpu(rapl_cpu_match);
+ if (!id)
return -ENODEV;
- switch (boot_cpu_data.x86_model) {
- case 42: /* Sandy Bridge */
- case 58: /* Ivy Bridge */
- rapl_cntr_mask = RAPL_IDX_CLN;
- rapl_pmu_events_group.attrs = rapl_events_cln_attr;
- break;
- case 63: /* Haswell-Server */
- case 79: /* Broadwell-Server */
- apply_quirk = true;
- rapl_cntr_mask = RAPL_IDX_SRV;
- rapl_pmu_events_group.attrs = rapl_events_srv_attr;
- break;
- case 60: /* Haswell */
- case 69: /* Haswell-Celeron */
- case 70: /* Haswell GT3e */
- case 61: /* Broadwell */
- case 71: /* Broadwell-H */
- rapl_cntr_mask = RAPL_IDX_HSW;
- rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
- break;
- case 45: /* Sandy Bridge-EP */
- case 62: /* IvyTown */
- rapl_cntr_mask = RAPL_IDX_SRV;
- rapl_pmu_events_group.attrs = rapl_events_srv_attr;
- break;
- case 87: /* Knights Landing */
- apply_quirk = true;
- rapl_cntr_mask = RAPL_IDX_KNL;
- rapl_pmu_events_group.attrs = rapl_events_knl_attr;
- break;
- default:
- return -ENODEV;
- }
+ rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
+ apply_quirk = rapl_init->apply_quirk;
+ rapl_cntr_mask = rapl_init->cntr_mask;
+ rapl_pmu_events_group.attrs = rapl_init->attrs;
ret = rapl_check_hw_unit(apply_quirk);
if (ret)
@@ -756,7 +845,7 @@ static int __init rapl_pmu_init(void)
if (ret)
goto out;
- __perf_cpu_notifier(rapl_cpu_notifier);
+ __register_cpu_notifier(&rapl_cpu_nb);
cpu_notifier_register_done();
rapl_advertise();
return 0;
@@ -767,4 +856,14 @@ out:
cpu_notifier_register_done();
return ret;
}
-device_initcall(rapl_pmu_init);
+module_init(rapl_pmu_init);
+
+static void __exit intel_rapl_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&rapl_cpu_nb);
+ perf_pmu_unregister(&rapl_pmus->pmu);
+ cleanup_rapl_pmus();
+ cpu_notifier_register_done();
+}
+module_exit(intel_rapl_exit);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7012d18bb293..17734a6ef474 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,3 +1,4 @@
+#include <asm/cpu_device_id.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -21,6 +22,8 @@ static struct event_constraint uncore_constraint_fixed =
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
+MODULE_LICENSE("GPL");
+
static int uncore_pcibus_to_physid(struct pci_bus *bus)
{
struct pci2phy_map *map;
@@ -754,7 +757,7 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
pmu->registered = false;
}
-static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
{
struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
@@ -770,7 +773,7 @@ static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
}
}
-static void __init uncore_exit_boxes(void *dummy)
+static void uncore_exit_boxes(void *dummy)
{
struct intel_uncore_type **types;
@@ -787,7 +790,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
kfree(pmu->boxes);
}
-static void __init uncore_type_exit(struct intel_uncore_type *type)
+static void uncore_type_exit(struct intel_uncore_type *type)
{
struct intel_uncore_pmu *pmu = type->pmus;
int i;
@@ -804,7 +807,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
type->events_group = NULL;
}
-static void __init uncore_types_exit(struct intel_uncore_type **types)
+static void uncore_types_exit(struct intel_uncore_type **types)
{
for (; *types; types++)
uncore_type_exit(*types);
@@ -989,46 +992,6 @@ static int __init uncore_pci_init(void)
size_t size;
int ret;
- switch (boot_cpu_data.x86_model) {
- case 45: /* Sandy Bridge-EP */
- ret = snbep_uncore_pci_init();
- break;
- case 62: /* Ivy Bridge-EP */
- ret = ivbep_uncore_pci_init();
- break;
- case 63: /* Haswell-EP */
- ret = hswep_uncore_pci_init();
- break;
- case 79: /* BDX-EP */
- case 86: /* BDX-DE */
- ret = bdx_uncore_pci_init();
- break;
- case 42: /* Sandy Bridge */
- ret = snb_uncore_pci_init();
- break;
- case 58: /* Ivy Bridge */
- ret = ivb_uncore_pci_init();
- break;
- case 60: /* Haswell */
- case 69: /* Haswell Celeron */
- ret = hsw_uncore_pci_init();
- break;
- case 61: /* Broadwell */
- ret = bdw_uncore_pci_init();
- break;
- case 87: /* Knights Landing */
- ret = knl_uncore_pci_init();
- break;
- case 94: /* SkyLake */
- ret = skl_uncore_pci_init();
- break;
- default:
- return -ENODEV;
- }
-
- if (ret)
- return ret;
-
size = max_packages * sizeof(struct pci_extra_dev);
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
if (!uncore_extra_pci_dev) {
@@ -1060,7 +1023,7 @@ err:
return ret;
}
-static void __init uncore_pci_exit(void)
+static void uncore_pci_exit(void)
{
if (pcidrv_registered) {
pcidrv_registered = false;
@@ -1287,46 +1250,6 @@ static int __init uncore_cpu_init(void)
{
int ret;
- switch (boot_cpu_data.x86_model) {
- case 26: /* Nehalem */
- case 30:
- case 37: /* Westmere */
- case 44:
- nhm_uncore_cpu_init();
- break;
- case 42: /* Sandy Bridge */
- case 58: /* Ivy Bridge */
- case 60: /* Haswell */
- case 69: /* Haswell */
- case 70: /* Haswell */
- case 61: /* Broadwell */
- case 71: /* Broadwell */
- snb_uncore_cpu_init();
- break;
- case 45: /* Sandy Bridge-EP */
- snbep_uncore_cpu_init();
- break;
- case 46: /* Nehalem-EX */
- case 47: /* Westmere-EX aka. Xeon E7 */
- nhmex_uncore_cpu_init();
- break;
- case 62: /* Ivy Bridge-EP */
- ivbep_uncore_cpu_init();
- break;
- case 63: /* Haswell-EP */
- hswep_uncore_cpu_init();
- break;
- case 79: /* BDX-EP */
- case 86: /* BDX-DE */
- bdx_uncore_cpu_init();
- break;
- case 87: /* Knights Landing */
- knl_uncore_cpu_init();
- break;
- default:
- return -ENODEV;
- }
-
ret = uncore_types_init(uncore_msr_uncores, true);
if (ret)
goto err;
@@ -1376,11 +1299,105 @@ static int __init uncore_cpumask_init(bool msr)
return 0;
}
+#define X86_UNCORE_MODEL_MATCH(model, init) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_uncore_init_fun {
+ void (*cpu_init)(void);
+ int (*pci_init)(void);
+};
+
+static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
+ .cpu_init = nhm_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = snb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = ivb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = hsw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = bdw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
+ .cpu_init = snbep_uncore_cpu_init,
+ .pci_init = snbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
+ .cpu_init = nhmex_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
+ .cpu_init = ivbep_uncore_cpu_init,
+ .pci_init = ivbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
+ .cpu_init = hswep_uncore_cpu_init,
+ .pci_init = hswep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
+ .cpu_init = bdx_uncore_cpu_init,
+ .pci_init = bdx_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
+ .cpu_init = knl_uncore_cpu_init,
+ .pci_init = knl_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ .pci_init = skl_uncore_pci_init,
+};
+
+static const struct x86_cpu_id intel_uncore_match[] __initconst = {
+ X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
+ X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
+ X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
+ X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
+ X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
+ X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
+ X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
+ X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
+ X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
+ X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
+ X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
+ X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
+ X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
+ X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
+ X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
+ X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
+ X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
+ X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ {},
+};
+
+MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
+
static int __init intel_uncore_init(void)
{
- int pret, cret, ret;
+ const struct x86_cpu_id *id;
+ struct intel_uncore_init_fun *uncore_init;
+ int pret = 0, cret = 0, ret;
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ id = x86_match_cpu(intel_uncore_match);
+ if (!id)
return -ENODEV;
if (cpu_has_hypervisor)
@@ -1388,8 +1405,17 @@ static int __init intel_uncore_init(void)
max_packages = topology_max_packages();
- pret = uncore_pci_init();
- cret = uncore_cpu_init();
+ uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
+ if (uncore_init->pci_init) {
+ pret = uncore_init->pci_init();
+ if (!pret)
+ pret = uncore_pci_init();
+ }
+
+ if (uncore_init->cpu_init) {
+ uncore_init->cpu_init();
+ cret = uncore_cpu_init();
+ }
if (cret && pret)
return -ENODEV;
@@ -1409,4 +1435,14 @@ err:
cpu_notifier_register_done();
return ret;
}
-device_initcall(intel_uncore_init);
+module_init(intel_uncore_init);
+
+static void __exit intel_uncore_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&uncore_cpu_nb);
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_pci_exit();
+ cpu_notifier_register_done();
+}
+module_exit(intel_uncore_exit);
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index ab2bcaaebe38..b2625867ebd1 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -219,6 +219,9 @@
#define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff
#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18)
#define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE (0x1ULL << 32)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE (0x1ULL << 33)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_NNC (0x1ULL << 37)
/* KNL EDC/MC UCLK */
#define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400
@@ -1902,6 +1905,10 @@ static int knl_cha_hw_config(struct intel_uncore_box *box,
reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
+
+ reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE;
+ reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE;
+ reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_NNC;
reg1->idx = idx;
}
return 0;
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index ec863b9a9f78..85ef3c2e80e0 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -6,6 +6,8 @@ enum perf_msr_id {
PERF_MSR_MPERF = 2,
PERF_MSR_PPERF = 3,
PERF_MSR_SMI = 4,
+ PERF_MSR_PTSC = 5,
+ PERF_MSR_IRPERF = 6,
PERF_MSR_EVENT_MAX,
};
@@ -15,6 +17,16 @@ static bool test_aperfmperf(int idx)
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}
+static bool test_ptsc(int idx)
+{
+ return boot_cpu_has(X86_FEATURE_PTSC);
+}
+
+static bool test_irperf(int idx)
+{
+ return boot_cpu_has(X86_FEATURE_IRPERF);
+}
+
static bool test_intel(int idx)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
@@ -69,18 +81,22 @@ struct perf_msr {
bool (*test)(int idx);
};
-PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
+PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
+PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
+PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05");
+PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
static struct perf_msr msr[] = {
- [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
- [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
- [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
- [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
- [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
+ [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
+ [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
+ [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
+ [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
+ [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
+ [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
+ [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
};
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
@@ -166,7 +182,7 @@ again:
if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
delta = sign_extend64(delta, 31);
- local64_add(now - prev, &event->count);
+ local64_add(delta, &event->count);
}
static void msr_event_start(struct perf_event *event, int flags)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ad4dc7ffffb5..8bd764df815d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -601,6 +601,7 @@ struct x86_pmu {
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
bool lbr_double_abort; /* duplicated lbr aborts */
+ bool lbr_pt_coexist; /* LBR may coexist with PT */
/*
* Intel PT/LBR/BTS are exclusive
@@ -859,6 +860,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[];
extern struct event_constraint intel_slm_pebs_event_constraints[];
+extern struct event_constraint intel_glm_pebs_event_constraints[];
+
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
extern struct event_constraint intel_westmere_pebs_event_constraints[];
@@ -907,6 +910,8 @@ void intel_pmu_lbr_init_nhm(void);
void intel_pmu_lbr_init_atom(void);
+void intel_pmu_lbr_init_slm(void);
+
void intel_pmu_lbr_init_snb(void);
void intel_pmu_lbr_init_hsw(void);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3636ec06c887..53ac9bbf2064 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -27,6 +27,7 @@ enum cpuid_leafs
CPUID_6_EAX,
CPUID_8000_000A_EDX,
CPUID_7_ECX,
+ CPUID_8000_0007_EBX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8f9afefd2dc5..0aee9dd1976e 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 17 /* N 32-bit words worth of info */
+#define NCAPINTS 18 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -177,6 +177,7 @@
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
@@ -250,6 +251,7 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -280,6 +282,11 @@
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 53748c45e488..78d1e7467eae 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,6 +3,7 @@
#include <asm/fpu/api.h>
#include <asm/pgtable.h>
+#include <asm/processor-flags.h>
#include <asm/tlb.h>
/*
@@ -28,33 +29,22 @@
#define MAX_CMDLINE_ADDRESS UINT_MAX
-#ifdef CONFIG_X86_32
+#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
+#ifdef CONFIG_X86_32
extern unsigned long asmlinkage efi_call_phys(void *, ...);
+#define arch_efi_call_virt_setup() kernel_fpu_begin()
+#define arch_efi_call_virt_teardown() kernel_fpu_end()
+
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
*/
-
-/* Use this macro if your virtual returns a non-void value */
-#define efi_call_virt(f, args...) \
+#define arch_efi_call_virt(f, args...) \
({ \
- efi_status_t __s; \
- kernel_fpu_begin(); \
- __s = ((efi_##f##_t __attribute__((regparm(0)))*) \
- efi.systab->runtime->f)(args); \
- kernel_fpu_end(); \
- __s; \
-})
-
-/* Use this macro if your virtual call does not return any value */
-#define __efi_call_virt(f, args...) \
-({ \
- kernel_fpu_begin(); \
((efi_##f##_t __attribute__((regparm(0)))*) \
efi.systab->runtime->f)(args); \
- kernel_fpu_end(); \
})
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
@@ -78,10 +68,8 @@ struct efi_scratch {
u64 phys_stack;
} __packed;
-#define efi_call_virt(f, ...) \
+#define arch_efi_call_virt_setup() \
({ \
- efi_status_t __s; \
- \
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
@@ -91,9 +79,13 @@ struct efi_scratch {
write_cr3((unsigned long)efi_scratch.efi_pgt); \
__flush_tlb_all(); \
} \
- \
- __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
- \
+})
+
+#define arch_efi_call_virt(f, args...) \
+ efi_call((void *)efi.systab->runtime->f, args) \
+
+#define arch_efi_call_virt_teardown() \
+({ \
if (efi_scratch.use_pgd) { \
write_cr3(efi_scratch.prev_cr3); \
__flush_tlb_all(); \
@@ -101,15 +93,8 @@ struct efi_scratch {
\
__kernel_fpu_end(); \
preempt_enable(); \
- __s; \
})
-/*
- * All X86_64 virt calls return non-void values. Thus, use non-void call for
- * virt calls that would be void on X86_32.
- */
-#define __efi_call_virt(f, args...) efi_call_virt(f, args)
-
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
@@ -180,6 +165,8 @@ static inline bool efi_runtime_supported(void)
extern struct console early_efi_console;
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
+extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+
#ifdef CONFIG_EFI_MIXED
extern void efi_thunk_runtime_setup(void);
extern efi_status_t efi_thunk_set_virtual_address_map(
@@ -225,6 +212,11 @@ __pure const struct efi_config *__efi_early(void);
#define efi_call_early(f, ...) \
__efi_early()->call(__efi_early()->f, __VA_ARGS__);
+#define __efi_call_early(f, ...) \
+ __efi_early()->call((unsigned long)f, __VA_ARGS__);
+
+#define efi_is_64bit() __efi_early()->is64
+
extern bool efi_reboot_required(void);
#else
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 92b6f651fa4f..8bf766ef0e18 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -104,13 +104,23 @@
#define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_CTL 0xc0002000
+#define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001
+#define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
+#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
/*
@@ -168,9 +178,18 @@ struct mce_vendor_flags {
__reserved_0 : 61;
};
+
+struct mca_msr_regs {
+ u32 (*ctl) (int bank);
+ u32 (*status) (int bank);
+ u32 (*addr) (int bank);
+ u32 (*misc) (int bank);
+};
+
extern struct mce_vendor_flags mce_flags;
extern struct mca_config mca_cfg;
+extern struct mca_msr_regs msr_ops;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5b3c9a55f51c..5a73a9c62c39 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -89,27 +89,16 @@
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_IA32_RTIT_CTL 0x00000570
-#define RTIT_CTL_TRACEEN BIT(0)
-#define RTIT_CTL_CYCLEACC BIT(1)
-#define RTIT_CTL_OS BIT(2)
-#define RTIT_CTL_USR BIT(3)
-#define RTIT_CTL_CR3EN BIT(7)
-#define RTIT_CTL_TOPA BIT(8)
-#define RTIT_CTL_MTC_EN BIT(9)
-#define RTIT_CTL_TSC_EN BIT(10)
-#define RTIT_CTL_DISRETC BIT(11)
-#define RTIT_CTL_BRANCH_EN BIT(13)
-#define RTIT_CTL_MTC_RANGE_OFFSET 14
-#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
-#define RTIT_CTL_CYC_THRESH_OFFSET 19
-#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
-#define RTIT_CTL_PSB_FREQ_OFFSET 24
-#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
#define MSR_IA32_RTIT_STATUS 0x00000571
-#define RTIT_STATUS_CONTEXTEN BIT(1)
-#define RTIT_STATUS_TRIGGEREN BIT(2)
-#define RTIT_STATUS_ERROR BIT(4)
-#define RTIT_STATUS_STOPPED BIT(5)
+#define MSR_IA32_RTIT_STATUS 0x00000571
+#define MSR_IA32_RTIT_ADDR0_A 0x00000580
+#define MSR_IA32_RTIT_ADDR0_B 0x00000581
+#define MSR_IA32_RTIT_ADDR1_A 0x00000582
+#define MSR_IA32_RTIT_ADDR1_B 0x00000583
+#define MSR_IA32_RTIT_ADDR2_A 0x00000584
+#define MSR_IA32_RTIT_ADDR2_B 0x00000585
+#define MSR_IA32_RTIT_ADDR3_A 0x00000586
+#define MSR_IA32_RTIT_ADDR3_B 0x00000587
#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
@@ -205,6 +194,8 @@
#define MSR_CONFIG_TDP_CONTROL 0x0000064B
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
@@ -315,6 +306,9 @@
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
@@ -328,6 +322,7 @@
#define MSR_F15H_PERF_CTR 0xc0010201
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_PTSC 0xc0010280
#define MSR_F15H_IC_CFG 0xc0011021
/* Fam 10h MSRs */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ceec86eb68e9..453744c1d347 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -99,26 +99,36 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
/*
* lock for writing
*/
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+#define ____down_write(sem, slow_path) \
+({ \
+ long tmp; \
+ struct rw_semaphore* ret; \
+ asm volatile("# beginning down_write\n\t" \
+ LOCK_PREFIX " xadd %1,(%3)\n\t" \
+ /* adds 0xffff0001, returns the old value */ \
+ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
+ /* was the active mask 0 before? */\
+ " jz 1f\n" \
+ " call " slow_path "\n" \
+ "1:\n" \
+ "# ending down_write" \
+ : "+m" (sem->count), "=d" (tmp), "=a" (ret) \
+ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
+ : "memory", "cc"); \
+ ret; \
+})
+
+static inline void __down_write(struct rw_semaphore *sem)
{
- long tmp;
- asm volatile("# beginning down_write\n\t"
- LOCK_PREFIX " xadd %1,(%2)\n\t"
- /* adds 0xffff0001, returns the old value */
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
- /* was the active mask 0 before? */
- " jz 1f\n"
- " call call_rwsem_down_write_failed\n"
- "1:\n"
- "# ending down_write"
- : "+m" (sem->count), "=d" (tmp)
- : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
+ ____down_write(sem, "call_rwsem_down_write_failed");
}
-static inline void __down_write(struct rw_semaphore *sem)
+static inline int __down_write_killable(struct rw_semaphore *sem)
{
- __down_write_nested(sem, 0);
+ if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
+ return -EINTR;
+
+ return 0;
}
/*
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a969ae607be8..2e7513d1f1f4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -108,6 +108,14 @@ struct exception_table_entry {
#define ARCH_HAS_RELATIVE_EXTABLE
+#define swap_ex_entry_fixup(a, b, tmp, delta) \
+ do { \
+ (a)->fixup = (b)->fixup + (delta); \
+ (b)->fixup = (tmp).fixup - (delta); \
+ (a)->handler = (b)->handler + (delta); \
+ (b)->handler = (tmp).handler - (delta); \
+ } while (0)
+
extern int fixup_exception(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern int early_fixup_exception(unsigned long *ip);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8394b3d1f94f..dbc6f066e231 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -717,6 +717,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
}
}
+ if (c->extended_cpuid_level >= 0x80000007) {
+ cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+ c->x86_capability[CPUID_8000_0007_EBX] = ebx;
+ c->x86_power = edx;
+ }
+
if (c->extended_cpuid_level >= 0x80000008) {
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
@@ -729,9 +736,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_phys_bits = 36;
#endif
- if (c->extended_cpuid_level >= 0x80000007)
- c->x86_power = cpuid_edx(0x80000007);
-
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 2658e2af74ec..93d824ec3120 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool;
static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ];
+/*
+ * Compare the record "t" with each of the records on list "l" to see if
+ * an equivalent one is present in the list.
+ */
+static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
+{
+ struct mce_evt_llist *node;
+ struct mce *m1, *m2;
+
+ m1 = &t->mce;
+
+ llist_for_each_entry(node, &l->llnode, llnode) {
+ m2 = &node->mce;
+
+ if (!mce_cmp(m1, m2))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * The system has panicked - we'd like to peruse the list of MCE records
+ * that have been queued, but not seen by anyone yet. The list is in
+ * reverse time order, so we need to reverse it. While doing that we can
+ * also drop duplicate records (these were logged because some banks are
+ * shared between cores or by all threads on a socket).
+ */
+struct llist_node *mce_gen_pool_prepare_records(void)
+{
+ struct llist_node *head;
+ LLIST_HEAD(new_head);
+ struct mce_evt_llist *node, *t;
+
+ head = llist_del_all(&mce_event_llist);
+ if (!head)
+ return NULL;
+
+ /* squeeze out duplicates while reversing order */
+ llist_for_each_entry_safe(node, t, head, llnode) {
+ if (!is_duplicate_mce_record(node, t))
+ llist_add(&node->llnode, &new_head);
+ }
+
+ return new_head.first;
+}
+
void mce_gen_pool_process(void)
{
struct llist_node *head;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 547720efd923..cd74a3f00aea 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -35,6 +35,7 @@ void mce_gen_pool_process(void);
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
+struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id)
#endif
void mce_inject_log(struct mce *m);
+
+/*
+ * We consider records to be equivalent if bank+status+addr+misc all match.
+ * This is only used when the system is going down because of a fatal error
+ * to avoid cluttering the console log with essentially repeated information.
+ * In normal processing all errors seen are logged.
+ */
+static inline bool mce_cmp(struct mce *m1, struct mce *m2)
+{
+ return m1->bank != m2->bank ||
+ m1->status != m2->status ||
+ m1->addr != m2->addr ||
+ m1->misc != m2->misc;
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5119766d9889..631356c8cca4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,6 +204,33 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}
+static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+{
+ u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+ u32 low, high;
+
+ /*
+ * We need to look at the following bits:
+ * - "succor" bit (data poisoning support), and
+ * - TCC bit (Task Context Corrupt)
+ * in MCi_STATUS to determine error severity.
+ */
+ if (!mce_flags.succor)
+ return MCE_PANIC_SEVERITY;
+
+ if (rdmsr_safe(addr, &low, &high))
+ return MCE_PANIC_SEVERITY;
+
+ /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
+ if ((low & MCI_CONFIG_MCAX) &&
+ (m->status & MCI_STATUS_TCC) &&
+ (err_ctx == IN_KERNEL))
+ return MCE_PANIC_SEVERITY;
+
+ /* ...otherwise invoke hwpoison handler. */
+ return MCE_AR_SEVERITY;
+}
+
/*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
* to at least kill process to prolong system operation.
*/
if (mce_flags.overflow_recov) {
+ if (mce_flags.smca)
+ return mce_severity_amd_smca(m, ctx);
+
/* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f0c921b03e42..92e5e37d97bf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -161,7 +161,6 @@ void mce_log(struct mce *mce)
if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work);
- mce->finished = 0;
wmb();
for (;;) {
entry = mce_log_get_idx_check(mcelog.next);
@@ -194,7 +193,6 @@ void mce_log(struct mce *mce)
mcelog.entry[entry].finished = 1;
wmb();
- mce->finished = 1;
set_bit(0, &mce_need_notify);
}
@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
+static inline u32 ctl_reg(int bank)
+{
+ return MSR_IA32_MCx_CTL(bank);
+}
+
+static inline u32 status_reg(int bank)
+{
+ return MSR_IA32_MCx_STATUS(bank);
+}
+
+static inline u32 addr_reg(int bank)
+{
+ return MSR_IA32_MCx_ADDR(bank);
+}
+
+static inline u32 misc_reg(int bank)
+{
+ return MSR_IA32_MCx_MISC(bank);
+}
+
+static inline u32 smca_ctl_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_CTL(bank);
+}
+
+static inline u32 smca_status_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_STATUS(bank);
+}
+
+static inline u32 smca_addr_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_ADDR(bank);
+}
+
+static inline u32 smca_misc_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+}
+
+struct mca_msr_regs msr_ops = {
+ .ctl = ctl_reg,
+ .status = status_reg,
+ .addr = addr_reg,
+ .misc = misc_reg
+};
+
static void print_mce(struct mce *m)
{
int ret = 0;
@@ -290,7 +335,9 @@ static void wait_for_panic(void)
static void mce_panic(const char *msg, struct mce *final, char *exp)
{
- int i, apei_err = 0;
+ int apei_err = 0;
+ struct llist_node *pending;
+ struct mce_evt_llist *l;
if (!fake_panic) {
/*
@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
if (atomic_inc_return(&mce_fake_panicked) > 1)
return;
}
+ pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
- for (i = 0; i < MCE_LOG_LEN; i++) {
- struct mce *m = &mcelog.entry[i];
- if (!(m->status & MCI_STATUS_VAL))
- continue;
+ llist_for_each_entry(l, pending, llnode) {
+ struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC)) {
print_mce(m);
if (!apei_err)
@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
}
}
/* Now print uncorrected but with the final one last */
- for (i = 0; i < MCE_LOG_LEN; i++) {
- struct mce *m = &mcelog.entry[i];
- if (!(m->status & MCI_STATUS_VAL))
- continue;
+ llist_for_each_entry(l, pending, llnode) {
+ struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || memcmp(m, final, sizeof(struct mce))) {
+ if (!final || mce_cmp(m, final)) {
print_mce(m);
if (!apei_err)
apei_err = apei_write_mce(m);
@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr)
if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
- if (msr == MSR_IA32_MCx_STATUS(bank))
+ if (msr == msr_ops.status(bank))
return offsetof(struct mce, status);
- if (msr == MSR_IA32_MCx_ADDR(bank))
+ if (msr == msr_ops.addr(bank))
return offsetof(struct mce, addr);
- if (msr == MSR_IA32_MCx_MISC(bank))
+ if (msr == msr_ops.misc(bank))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = {
static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
- m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+ m->misc = mce_rdmsrl(msr_ops.misc(i));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ m->addr = mce_rdmsrl(msr_ops.addr(i));
/*
* Mask the reported address by the reported granularity.
@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.tsc = 0;
barrier();
- m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
/*
* Clear state for this bank.
*/
- mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ mce_wrmsrl(msr_ops.status(i), 0);
}
/*
@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
- m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m->status = mce_rdmsrl(msr_ops.status(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out)
atomic_add(*no_way_out, &global_nwo);
/*
- * global_nwo should be updated before mce_callin
+ * Rely on the implied barrier below, such that global_nwo
+ * is updated before mce_callin.
*/
- smp_wmb();
order = atomic_inc_return(&mce_callin);
/*
@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
- mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ mce_wrmsrl(msr_ops.status(i), 0);
}
}
@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int i;
int worst = 0;
int severity;
+
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
- int order;
+ int order = -1;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
- int lmce = 0;
+
+ /*
+ * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
+ * on Intel.
+ */
+ int lmce = 1;
/* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) {
@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
/*
- * Check if this MCE is signaled to only this logical processor
+ * Check if this MCE is signaled to only this logical processor,
+ * on Intel only.
*/
- if (m.mcgstatus & MCG_STATUS_LMCES)
- lmce = 1;
- else {
- /*
- * Go through all the banks in exclusion of the other CPUs.
- * This way we don't report duplicated events on shared banks
- * because the first one to see it will clear it.
- * If this is a Local MCE, then no need to perform rendezvous.
- */
+ if (m.cpuvendor == X86_VENDOR_INTEL)
+ lmce = m.mcgstatus & MCG_STATUS_LMCES;
+
+ /*
+ * Go through all banks in exclusion of the other CPUs. This way we
+ * don't report duplicated events on shared banks because the first one
+ * to see it will clear it. If this is a Local MCE, then no need to
+ * perform rendezvous.
+ */
+ if (!lmce)
order = mce_start(&no_way_out);
- }
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
m.addr = 0;
m.bank = i;
- m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m.status = mce_rdmsrl(msr_ops.status(i));
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void)
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
- int i;
if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG;
@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+}
+
+static void __mcheck_cpu_init_clear_banks(void)
+{
+ int i;
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
- wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
- wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ wrmsrl(msr_ops.ctl(i), b->ctl);
+ wrmsrl(msr_ops.status(i), 0);
}
}
@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
- if (c->x86 <= 17 && cfg->bootlog < 0) {
+ if (c->x86 < 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
break;
case X86_VENDOR_AMD: {
- u32 ebx = cpuid_ebx(0x80000007);
+ mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
+ mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
+ mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
- mce_flags.overflow_recov = !!(ebx & BIT(0));
- mce_flags.succor = !!(ebx & BIT(1));
- mce_flags.smca = !!(ebx & BIT(3));
+ /*
+ * Install proper ops for Scalable MCA enabled processors
+ */
+ if (mce_flags.smca) {
+ msr_ops.ctl = smca_ctl_reg;
+ msr_ops.status = smca_status_reg;
+ msr_ops.addr = smca_addr_reg;
+ msr_ops.misc = smca_misc_reg;
+ }
mce_amd_feature_init(c);
break;
@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
+ __mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(MSR_IA32_MCx_CTL(i), 0);
+ wrmsrl(msr_ops.ctl(i), 0);
}
return;
}
@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
+ __mcheck_cpu_init_clear_banks();
}
static struct syscore_ops mce_syscore_ops = {
@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
+ __mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+ wrmsrl(msr_ops.ctl(i), b->ctl);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9d656fd436ef..10b0661651e0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -54,14 +54,6 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
-/*
- * OS is required to set the MCAX bit to acknowledge that it is now using the
- * new MSR ranges and new registers under each bank. It also means that the OS
- * will configure deferred errors in the new MCx_CONFIG register. If the bit is
- * not set, uncorrectable errors will cause a system panic.
- */
-#define SMCA_MCAX_EN_OFF 0x1
-
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
- addr = MSR_IA32_MCx_MISC(bank);
+ addr = msr_ops.misc(bank);
break;
case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
+ u32 smca_low, smca_high, smca_addr;
struct threshold_block b;
int new;
@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
b.interrupt_enable = 1;
- if (mce_flags.smca) {
- u32 smca_low, smca_high;
- u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+ if (!mce_flags.smca) {
+ new = (misc_high & MASK_LVTOFF_HI) >> 20;
+ goto set_offset;
+ }
- if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
- smca_high |= SMCA_MCAX_EN_OFF;
- wrmsr(smca_addr, smca_low, smca_high);
- }
+ smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
- /* Gather LVT offset for thresholding: */
- if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
- goto out;
+ if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+ /*
+ * OS is required to set the MCAX bit to acknowledge that it is
+ * now using the new MSR ranges and new registers under each
+ * bank. It also means that the OS will configure deferred
+ * errors in the new MCx_CONFIG register. If the bit is not set,
+ * uncorrectable errors will cause a system panic.
+ *
+ * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
+ */
+ smca_high |= BIT(0);
- new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
- } else {
- new = (misc_high & MASK_LVTOFF_HI) >> 20;
+ /*
+ * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
+ * registers with the option of additionally logging to
+ * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
+ *
+ * This bit is usually set by BIOS to retain the old behavior
+ * for OSes that don't use the new registers. Linux supports the
+ * new registers so let's disable that additional logging here.
+ *
+ * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
+ * portion of the MSR).
+ */
+ smca_high &= ~BIT(2);
+
+ wrmsr(smca_addr, smca_low, smca_high);
}
+ /* Gather LVT offset for thresholding: */
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+ goto out;
+
+ new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+
+set_offset:
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
-static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+static void
+__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
+ u32 msr_status = msr_ops.status(bank);
+ u32 msr_addr = msr_ops.addr(bank);
struct mce m;
u64 status;
- rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ WARN_ON_ONCE(deferred_err && threshold_err);
+
+ if (deferred_err && mce_flags.smca) {
+ msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
+ msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
+ }
+
+ rdmsrl(msr_status, status);
+
if (!(status & MCI_STATUS_VAL))
return;
@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
m.misc = misc;
if (m.status & MCI_STATUS_ADDRV)
- rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+ rdmsrl(msr_addr, m.addr);
mce_log(&m);
- wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+
+ wrmsrl(msr_status, 0);
}
static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +509,21 @@ asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- u64 status;
unsigned int bank;
+ u32 msr_status;
+ u64 status;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
- rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
+ : msr_ops.status(bank);
+
+ rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;
- __log_error(bank, false, 0);
+ __log_error(bank, true, false, 0);
break;
}
}
@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void)
return;
log:
- __log_error(bank, true, ((u64)high << 32) | low);
+ __log_error(bank, false, true, ((u64)high << 32) | low);
}
/*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ab0adc0fa5db..a9b31eb815f2 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -535,6 +535,15 @@ static void native_machine_emergency_restart(void)
mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
*((unsigned short *)__va(0x472)) = mode;
+ /*
+ * If an EFI capsule has been registered with the firmware then
+ * override the reboot= parameter.
+ */
+ if (efi_capsule_pending(NULL)) {
+ pr_info("EFI capsule is pending, forcing EFI reboot.\n");
+ reboot_type = BOOT_EFI;
+ }
+
for (;;) {
/* Could also try the reset bit in the Hammer NB */
switch (reboot_type) {
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7d6fd2..3e84ef16f657 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -248,18 +248,17 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (config_enabled(CONFIG_X86_64))
sp -= 128;
- if (!onsigstack) {
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (current->sas_ss_size)
- sp = current->sas_ss_sp + current->sas_ss_size;
- } else if (config_enabled(CONFIG_X86_32) &&
- (regs->ss & 0xffff) != __USER_DS &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer) {
- /* This is the legacy signal stack switching. */
- sp = (unsigned long) ka->sa.sa_restorer;
- }
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ } else if (config_enabled(CONFIG_X86_32) &&
+ !onsigstack &&
+ (regs->ss & 0xffff) != __USER_DS &&
+ !(ka->sa.sa_flags & SA_RESTORER) &&
+ ka->sa.sa_restorer) {
+ /* This is the legacy signal stack switching. */
+ sp = (unsigned long) ka->sa.sa_restorer;
}
if (fpu->fpstate_active) {
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 5da924bbf0a0..623965e86b65 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -68,6 +68,21 @@ struct efifb_dmi_info efifb_dmi_list[] = {
[M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
};
+void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+ int i;
+
+ for (i = 0; i < M_UNKNOWN; i++) {
+ if (efifb_dmi_list[i].base != 0 &&
+ !strcmp(opt, efifb_dmi_list[i].optname)) {
+ si->lfb_base = efifb_dmi_list[i].base;
+ si->lfb_linelength = efifb_dmi_list[i].stride;
+ si->lfb_width = efifb_dmi_list[i].width;
+ si->lfb_height = efifb_dmi_list[i].height;
+ }
+ }
+}
+
#define choose_value(dmivalue, fwvalue, field, flags) ({ \
typeof(fwvalue) _ret_ = fwvalue; \
if ((flags) & (field)) \
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6eaec8f..bd074151bfd6 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
riprel_post_xol(auprobe, regs);
}
-static struct uprobe_xol_ops default_xol_ops = {
+static const struct uprobe_xol_ops default_xol_ops = {
.pre_xol = default_pre_xol_op,
.post_xol = default_post_xol_op,
.abort = default_abort_op,
@@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
0, insn->immediate.nbytes);
}
-static struct uprobe_xol_ops branch_xol_ops = {
+static const struct uprobe_xol_ops branch_xol_ops = {
.emulate = branch_emulate_op,
.post_xol = branch_post_xol_op,
};
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0f6294376fbd..a2f24af3c999 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5110,13 +5110,17 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
+ register void *__sp asm(_ASM_SP);
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
+
if (!(ctxt->d & ByteOp))
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
+
asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop)
+ [fastop]"+S"(fop), "+r"(__sp)
: "c"(ctxt->src2.val));
+
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
if (!fop) /* exception is returned in fop variable */
return emulate_de(ctxt);
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index be110efa0096..bf2c6074efd2 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -29,8 +29,10 @@
* there is contention on the semaphore.
*
* %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax whish is either a return
- * value or just clobbered..
+ * registers (%eax, %edx and %ecx) except %eax which is either a return
+ * value or just gets clobbered. Same is true for %edx so make sure GCC
+ * reloads it after the slow path, by making it hold a temporary, for
+ * example see ____down_write().
*/
#define save_common_regs \
@@ -106,6 +108,16 @@ ENTRY(call_rwsem_down_write_failed)
ret
ENDPROC(call_rwsem_down_write_failed)
+ENTRY(call_rwsem_down_write_failed_killable)
+ FRAME_BEGIN
+ save_common_regs
+ movq %rax,%rdi
+ call rwsem_down_write_failed_killable
+ restore_common_regs
+ FRAME_END
+ ret
+ENDPROC(call_rwsem_down_write_failed_killable)
+
ENTRY(call_rwsem_wake)
FRAME_BEGIN
/* do nothing if still outstanding active readers */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 01be9ec3bf79..a1f0e1d0ddc2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1125,8 +1125,14 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
int primary)
{
- if (cpa->pgd)
+ if (cpa->pgd) {
+ /*
+ * Right now, we only execute this code path when mapping
+ * the EFI virtual memory map regions, no other users
+ * provide a ->pgd value. This may change in the future.
+ */
return populate_pgd(cpa, vaddr);
+ }
/*
* Ignore all non primary paths.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 994a7df84a7b..f93545e7dc54 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -54,10 +54,6 @@
#include <asm/rtc.h>
#include <asm/uv/uv.h>
-#define EFI_DEBUG
-
-struct efi_memory_map memmap;
-
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
@@ -119,11 +115,10 @@ void efi_get_time(struct timespec *now)
void __init efi_find_mirror(void)
{
- void *p;
+ efi_memory_desc_t *md;
u64 mirror_size = 0, total_size = 0;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -146,10 +141,9 @@ void __init efi_find_mirror(void)
static void __init do_add_efi_memmap(void)
{
- void *p;
+ efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
int e820_type;
@@ -209,47 +203,47 @@ int __init efi_memblock_x86_reserve_range(void)
#else
pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
#endif
- memmap.phys_map = pmap;
- memmap.nr_map = e->efi_memmap_size /
+ efi.memmap.phys_map = pmap;
+ efi.memmap.nr_map = e->efi_memmap_size /
e->efi_memdesc_size;
- memmap.desc_size = e->efi_memdesc_size;
- memmap.desc_version = e->efi_memdesc_version;
+ efi.memmap.desc_size = e->efi_memdesc_size;
+ efi.memmap.desc_version = e->efi_memdesc_version;
- memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+ WARN(efi.memmap.desc_version != 1,
+ "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+ efi.memmap.desc_version);
- efi.memmap = &memmap;
+ memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
return 0;
}
void __init efi_print_memmap(void)
{
-#ifdef EFI_DEBUG
efi_memory_desc_t *md;
- void *p;
- int i;
+ int i = 0;
- for (p = memmap.map, i = 0;
- p < memmap.map_end;
- p += memmap.desc_size, i++) {
+ for_each_efi_memory_desc(md) {
char buf[64];
- md = p;
pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
- i, efi_md_typeattr_format(buf, sizeof(buf), md),
+ i++, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
-#endif /* EFI_DEBUG */
}
void __init efi_unmap_memmap(void)
{
+ unsigned long size;
+
clear_bit(EFI_MEMMAP, &efi.flags);
- if (memmap.map) {
- early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size);
- memmap.map = NULL;
+
+ size = efi.memmap.nr_map * efi.memmap.desc_size;
+ if (efi.memmap.map) {
+ early_memunmap(efi.memmap.map, size);
+ efi.memmap.map = NULL;
}
}
@@ -352,8 +346,6 @@ static int __init efi_systab_init(void *phys)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff);
- set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
return 0;
}
@@ -440,17 +432,22 @@ static int __init efi_runtime_init(void)
static int __init efi_memmap_init(void)
{
+ unsigned long addr, size;
+
if (efi_enabled(EFI_PARAVIRT))
return 0;
/* Map the EFI memory map */
- memmap.map = early_memremap((unsigned long)memmap.phys_map,
- memmap.nr_map * memmap.desc_size);
- if (memmap.map == NULL) {
+ size = efi.memmap.nr_map * efi.memmap.desc_size;
+ addr = (unsigned long)efi.memmap.phys_map;
+
+ efi.memmap.map = early_memremap(addr, size);
+ if (efi.memmap.map == NULL) {
pr_err("Could not map the memory map!\n");
return -ENOMEM;
}
- memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
+ efi.memmap.map_end = efi.memmap.map + size;
if (add_efi_memmap)
do_add_efi_memmap();
@@ -552,12 +549,9 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
void __init runtime_code_page_mkexec(void)
{
efi_memory_desc_t *md;
- void *p;
/* Make EFI runtime service code area executable */
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
+ for_each_efi_memory_desc(md) {
if (md->type != EFI_RUNTIME_SERVICES_CODE)
continue;
@@ -604,12 +598,10 @@ void __init old_map_region(efi_memory_desc_t *md)
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
{
- void *p;
efi_memory_desc_t *md, *prev_md = NULL;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ for_each_efi_memory_desc(md) {
u64 prev_size;
- md = p;
if (!prev_md) {
prev_md = md;
@@ -651,30 +643,31 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md)
static void __init save_runtime_map(void)
{
#ifdef CONFIG_KEXEC_CORE
+ unsigned long desc_size;
efi_memory_desc_t *md;
- void *tmp, *p, *q = NULL;
+ void *tmp, *q = NULL;
int count = 0;
if (efi_enabled(EFI_OLD_MEMMAP))
return;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ desc_size = efi.memmap.desc_size;
+ for_each_efi_memory_desc(md) {
if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
(md->type == EFI_BOOT_SERVICES_CODE) ||
(md->type == EFI_BOOT_SERVICES_DATA))
continue;
- tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
+ tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
if (!tmp)
goto out;
q = tmp;
- memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
+ memcpy(q + count * desc_size, md, desc_size);
count++;
}
- efi_runtime_map_setup(q, count, memmap.desc_size);
+ efi_runtime_map_setup(q, count, desc_size);
return;
out:
@@ -714,10 +707,10 @@ static inline void *efi_map_next_entry_reverse(void *entry)
{
/* Initial call */
if (!entry)
- return memmap.map_end - memmap.desc_size;
+ return efi.memmap.map_end - efi.memmap.desc_size;
- entry -= memmap.desc_size;
- if (entry < memmap.map)
+ entry -= efi.memmap.desc_size;
+ if (entry < efi.memmap.map)
return NULL;
return entry;
@@ -759,10 +752,10 @@ static void *efi_map_next_entry(void *entry)
/* Initial call */
if (!entry)
- return memmap.map;
+ return efi.memmap.map;
- entry += memmap.desc_size;
- if (entry >= memmap.map_end)
+ entry += efi.memmap.desc_size;
+ if (entry >= efi.memmap.map_end)
return NULL;
return entry;
@@ -776,8 +769,11 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
{
void *p, *new_memmap = NULL;
unsigned long left = 0;
+ unsigned long desc_size;
efi_memory_desc_t *md;
+ desc_size = efi.memmap.desc_size;
+
p = NULL;
while ((p = efi_map_next_entry(p))) {
md = p;
@@ -792,7 +788,7 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
efi_map_region(md);
get_systab_virt_addr(md);
- if (left < memmap.desc_size) {
+ if (left < desc_size) {
new_memmap = realloc_pages(new_memmap, *pg_shift);
if (!new_memmap)
return NULL;
@@ -801,10 +797,9 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
(*pg_shift)++;
}
- memcpy(new_memmap + (*count * memmap.desc_size), md,
- memmap.desc_size);
+ memcpy(new_memmap + (*count * desc_size), md, desc_size);
- left -= memmap.desc_size;
+ left -= desc_size;
(*count)++;
}
@@ -816,7 +811,6 @@ static void __init kexec_enter_virtual_mode(void)
#ifdef CONFIG_KEXEC_CORE
efi_memory_desc_t *md;
unsigned int num_pages;
- void *p;
efi.systab = NULL;
@@ -840,8 +834,7 @@ static void __init kexec_enter_virtual_mode(void)
* Map efi regions which were passed via setup_data. The virt_addr is a
* fixed addr which was used in first kernel of a kexec boot.
*/
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ for_each_efi_memory_desc(md) {
efi_map_region_fixed(md); /* FIXME: add error handling */
get_systab_virt_addr(md);
}
@@ -850,10 +843,10 @@ static void __init kexec_enter_virtual_mode(void)
BUG_ON(!efi.systab);
- num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE);
+ num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
num_pages >>= PAGE_SHIFT;
- if (efi_setup_page_tables(memmap.phys_map, num_pages)) {
+ if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
}
@@ -937,16 +930,16 @@ static void __init __efi_enter_virtual_mode(void)
if (efi_is_native()) {
status = phys_efi_set_virtual_address_map(
- memmap.desc_size * count,
- memmap.desc_size,
- memmap.desc_version,
+ efi.memmap.desc_size * count,
+ efi.memmap.desc_size,
+ efi.memmap.desc_version,
(efi_memory_desc_t *)__pa(new_memmap));
} else {
status = efi_thunk_set_virtual_address_map(
efi_phys.set_virtual_address_map,
- memmap.desc_size * count,
- memmap.desc_size,
- memmap.desc_version,
+ efi.memmap.desc_size * count,
+ efi.memmap.desc_size,
+ efi.memmap.desc_version,
(efi_memory_desc_t *)__pa(new_memmap));
}
@@ -1011,13 +1004,11 @@ void __init efi_enter_virtual_mode(void)
u32 efi_mem_type(unsigned long phys_addr)
{
efi_memory_desc_t *md;
- void *p;
if (!efi_enabled(EFI_MEMMAP))
return 0;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ for_each_efi_memory_desc(md) {
if ((md->phys_addr <= phys_addr) &&
(phys_addr < (md->phys_addr +
(md->num_pages << EFI_PAGE_SHIFT))))
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 49e4dd4a1f58..6e7242be1c87 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -55,14 +55,12 @@ struct efi_scratch efi_scratch;
static void __init early_code_mapping_set_exec(int executable)
{
efi_memory_desc_t *md;
- void *p;
if (!(__supported_pte_mask & _PAGE_NX))
return;
/* Make EFI service code area executable */
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ for_each_efi_memory_desc(md) {
if (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_BOOT_SERVICES_CODE)
efi_set_executable(md, executable);
@@ -253,7 +251,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
* Map all of RAM so that we can access arguments in the 1:1
* mapping when making EFI runtime calls.
*/
- for_each_efi_memory_desc(&memmap, md) {
+ for_each_efi_memory_desc(md) {
if (md->type != EFI_CONVENTIONAL_MEMORY &&
md->type != EFI_LOADER_DATA &&
md->type != EFI_LOADER_CODE)
@@ -398,7 +396,6 @@ void __init efi_runtime_update_mappings(void)
unsigned long pfn;
pgd_t *pgd = efi_pgd;
efi_memory_desc_t *md;
- void *p;
if (efi_enabled(EFI_OLD_MEMMAP)) {
if (__supported_pte_mask & _PAGE_NX)
@@ -409,9 +406,8 @@ void __init efi_runtime_update_mappings(void)
if (!efi_enabled(EFI_NX_PE_DATA))
return;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ for_each_efi_memory_desc(md) {
unsigned long pf = 0;
- md = p;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index ab50ada1d56e..097cb09d917b 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -195,10 +195,9 @@ static bool can_free_region(u64 start, u64 size)
*/
void __init efi_reserve_boot_services(void)
{
- void *p;
+ efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
u64 start = md->phys_addr;
u64 size = md->num_pages << EFI_PAGE_SHIFT;
bool already_reserved;
@@ -250,10 +249,9 @@ void __init efi_reserve_boot_services(void)
void __init efi_free_boot_services(void)
{
- void *p;
+ efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index df280da34825..d957d5f21a86 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -1,4 +1,4 @@
-config AMD_MCE_INJ
+config MCE_AMD_INJ
tristate "Simple MCE injection interface for AMD processors"
depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
default n
diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile
index dd2c98b84037..5f94546db280 100644
--- a/arch/x86/ras/Makefile
+++ b/arch/x86/ras/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o
+obj-$(CONFIG_MCE_AMD_INJ) += mce_amd_inj.o
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 9e02dcaef683..e69f4701a076 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -290,14 +290,33 @@ static void do_inject(void)
wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
(u32)mcg_status, (u32)(mcg_status >> 32));
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ if (inj_type == DFR_INT_INJ) {
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
+ (u32)i_mce.status, (u32)(i_mce.status >> 32));
+
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
+ (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+ } else {
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
+ (u32)i_mce.status, (u32)(i_mce.status >> 32));
+
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
+ (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+ }
+
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
+ (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+ } else {
+ wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
+ (u32)i_mce.status, (u32)(i_mce.status >> 32));
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+ wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
+ (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
- (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+ wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
+ (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+ }
toggle_hw_mce_inject(cpu, false);