From e535ec0899d1fe52ec3a84c9bc03457ac67ad6f7 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 20 Sep 2016 14:26:21 +0100 Subject: x86/mm/pat: Prevent hang during boot when mapping pages There's a mixture of signed 32-bit and unsigned 32-bit and 64-bit data types used for keeping track of how many pages have been mapped. This leads to hangs during boot when mapping large numbers of pages (multiple terabytes, as reported by Waiman) because those values are interpreted as being negative. commit 742563777e8d ("x86/mm/pat: Avoid truncation when converting cpa->numpages to address") fixed one of those bugs, but there is another lurking in __change_page_attr_set_clr(). Additionally, the return value type for the populate_*() functions can return negative values when a large number of pages have been mapped, triggering the error paths even though no error occurred. Consistently use 64-bit types on 64-bit platforms when counting pages. Even in the signed case this gives us room for regions 8PiB (pebibytes) in size whilst still allowing the usual negative value error checking idiom. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Signed-off-by: Matt Fleming --- arch/x86/mm/pageattr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 849dc09fa4f0..e3353c97d086 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -917,11 +917,11 @@ static void populate_pte(struct cpa_data *cpa, } } -static int populate_pmd(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pud_t *pud, pgprot_t pgprot) +static long populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) { - unsigned int cur_pages = 0; + long cur_pages = 0; pmd_t *pmd; pgprot_t pmd_pgprot; @@ -991,12 +991,12 @@ static int populate_pmd(struct cpa_data *cpa, return num_pages; } -static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, - pgprot_t pgprot) +static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) { pud_t *pud; unsigned long end; - int cur_pages = 0; + long cur_pages = 0; pgprot_t pud_pgprot; end = start + (cpa->numpages << PAGE_SHIFT); @@ -1052,7 +1052,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* Map trailing leftover */ if (start < end) { - int tmp; + long tmp; pud = pud_offset(pgd, start); if (pud_none(*pud)) @@ -1078,7 +1078,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgprot_t pgprot = __pgprot(_KERNPG_TABLE); pud_t *pud = NULL; /* shut up gcc */ pgd_t *pgd_entry; - int ret; + long ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1327,7 +1327,8 @@ static int cpa_process_alias(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) { - int ret, numpages = cpa->numpages; + unsigned long numpages = cpa->numpages; + int ret; while (numpages) { /* -- cgit v1.2.3 From 1297667083d5442aafe3e337b9413bf02b114edb Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 19 Sep 2016 13:09:09 +0100 Subject: x86/efi: Only map RAM into EFI page tables if in mixed-mode Waiman reported that booting with CONFIG_EFI_MIXED enabled on his multi-terabyte HP machine results in boot crashes, because the EFI region mapping functions loop forever while trying to map those regions describing RAM. While this patch doesn't fix the underlying hang, there's really no reason to map EFI_CONVENTIONAL_MEMORY regions into the EFI page tables when mixed-mode is not in use at runtime. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Cc: # v4.6+ Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 677e29e29473..8dd3784eb075 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -245,7 +245,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED)) + if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) return 0; /* -- cgit v1.2.3 From f1e1c9e5e357c05253affb13be29285c5cb56bf0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Sep 2016 15:12:21 +0200 Subject: perf/x86/intel/bts: Make sure debug store is valid Since commit 4d4c47412464 ("perf/x86/intel/bts: Fix BTS PMI detection") my box goes boom on boot: | .... node #0, CPUs: #1 #2 #3 #4 #5 #6 #7 | BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 | IP: [] intel_bts_interrupt+0x43/0x130 | Call Trace: | d [] intel_pmu_handle_irq+0x51/0x4b0 | [] perf_event_nmi_handler+0x27/0x40 This happens because the code introduced in this commit dereferences the debug store pointer unconditionally. The debug store is not guaranteed to be available, so a NULL pointer check as on other places is required. Fixes: 4d4c47412464 ("perf/x86/intel/bts: Fix BTS PMI detection") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: vince@deater.net Cc: eranian@google.com Link: http://lkml.kernel.org/r/20160920131220.xg5pbdjtznszuyzb@breakpoint.cc Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/bts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index bdcd6510992c..6ff66efa0feb 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -455,7 +455,7 @@ int intel_bts_interrupt(void) * The only surefire way of knowing if this NMI is ours is by checking * the write ptr against the PMI threshold. */ - if (ds->bts_index >= ds->bts_interrupt_threshold) + if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) handled = 1; /* -- cgit v1.2.3 From 08b90f0655258411a1b41d856331e20e7ec8d55c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 20 Sep 2016 18:48:10 +0300 Subject: perf/x86/intel/bts: Make it an exclusive PMU Just like intel_pt, intel_bts can only handle one event at a time, which is the reason we introduced PERF_PMU_CAP_EXCLUSIVE in the first place. However, at the moment one can have as many intel_bts events within the same context at the same time as one pleases. Only one of them, however, will get scheduled and receive the actual trace data. Fix this by making intel_bts an "exclusive" PMU. Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160920154811.3255-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 6ff66efa0feb..982c9e31daca 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -584,7 +584,8 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; - bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; bts_pmu.event_init = bts_event_init; bts_pmu.add = bts_event_add; -- cgit v1.2.3