From 81ff2c37f9e5d77593928df0536d86443195fd64 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 18 Nov 2019 16:21:12 +0100 Subject: x86/stackframe/32: Repair 32-bit Xen PV Once again RPL checks have been introduced which don't account for a 32-bit kernel living in ring 1 when running in a PV Xen domain. The case in FIXUP_FRAME has been preventing boot. Adjust BUG_IF_WRONG_CR3 as well to guard against future uses of the macro on a code path reachable when running in PV mode under Xen; I have to admit that I stopped at a certain point trying to figure out whether there are present ones. Fixes: 3c88c692c287 ("x86/stackframe/32: Provide consistent pt_regs") Signed-off-by: Jan Beulich Signed-off-by: Thomas Gleixner Cc: Stable Team Link: https://lore.kernel.org/r/0fad341f-b7f5-f859-d55d-f0084ee7087e@suse.com --- arch/x86/include/asm/segment.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index ac3892920419..6669164abadc 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -31,6 +31,18 @@ */ #define SEGMENT_RPL_MASK 0x3 +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK 0x2 + /* User mode is privilege level 3: */ #define USER_RPL 0x3 -- cgit v1.2.3 From 880a98c339961eaa074393e3a2117cbe9125b8bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Nov 2019 00:40:24 +0100 Subject: x86/cpu_entry_area: Add guard page for entry stack on 32bit The entry stack in the cpu entry area is protected against overflow by the readonly GDT on 64-bit, but on 32-bit the GDT needs to be writeable and therefore does not trigger a fault on stack overflow. Add a guard page. Fixes: c482feefe1ae ("x86/entry/64: Make cpu_entry_area.tss read-only") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: stable@kernel.org --- arch/x86/include/asm/cpu_entry_area.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 8348f7d69fd5..905d89c80d3f 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -78,8 +78,12 @@ struct cpu_entry_area { /* * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. + * a read-only guard page. On 32-bit the GDT must be writeable, so + * it needs an extra guard page. */ +#ifdef CONFIG_X86_32 + char guard_entry_stack[PAGE_SIZE]; +#endif struct entry_stack_page entry_stack_page; /* -- cgit v1.2.3 From 05b042a1944322844eaae7ea596d5f154166d68a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 Nov 2019 11:21:44 +0100 Subject: x86/pti/32: Calculate the various PTI cpu_entry_area sizes correctly, make the CPU_ENTRY_AREA_PAGES assert precise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When two recent commits that increased the size of the 'struct cpu_entry_area' were merged in -tip, the 32-bit defconfig build started failing on the following build time assert: ./include/linux/compiler.h:391:38: error: call to ‘__compiletime_assert_189’ declared with attribute error: BUILD_BUG_ON failed: CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE arch/x86/mm/cpu_entry_area.c:189:2: note: in expansion of macro ‘BUILD_BUG_ON’ In function ‘setup_cpu_entry_area_ptes’, Which corresponds to the following build time assert: BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); The purpose of this assert is to sanity check the fixed-value definition of CPU_ENTRY_AREA_PAGES arch/x86/include/asm/pgtable_32_types.h: #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41) The '41' is supposed to match sizeof(struct cpu_entry_area)/PAGE_SIZE, which value we didn't want to define in such a low level header, because it would cause dependency hell. Every time the size of cpu_entry_area is changed, we have to adjust CPU_ENTRY_AREA_PAGES accordingly - and this assert is checking that constraint. But the assert is both imprecise and buggy, primarily because it doesn't include the single readonly IDT page that is mapped at CPU_ENTRY_AREA_BASE (which begins at a PMD boundary). This bug was hidden by the fact that by accident CPU_ENTRY_AREA_PAGES is defined too large upstream (v5.4-rc8): #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) While 'struct cpu_entry_area' is 155648 bytes, or 38 pages. So we had two extra pages, which hid the bug. The following commit (not yet upstream) increased the size to 40 pages: x86/iopl: ("Restrict iopl() permission scope") ... but increased CPU_ENTRY_AREA_PAGES only 41 - i.e. shortening the gap to just 1 extra page. Then another not-yet-upstream commit changed the size again: 880a98c33996: ("x86/cpu_entry_area: Add guard page for entry stack on 32bit") Which increased the cpu_entry_area size from 38 to 39 pages, but didn't change CPU_ENTRY_AREA_PAGES (kept it at 40). This worked fine, because we still had a page left from the accidental 'reserve'. But when these two commits were merged into the same tree, the combined size of cpu_entry_area grew from 38 to 40 pages, while CPU_ENTRY_AREA_PAGES finally caught up to 40 as well. Which is fine in terms of functionality, but the assert broke: BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); because CPU_ENTRY_AREA_MAP_SIZE is the total size of the area, which is 1 page larger due to the IDT page. To fix all this, change the assert to two precise asserts: BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); This takes the IDT page into account, and also connects the size-based define of CPU_ENTRY_AREA_TOTAL_SIZE with the address-subtraction based define of CPU_ENTRY_AREA_MAP_SIZE. Also clean up some of the names which made it rather confusing: - 'CPU_ENTRY_AREA_TOT_SIZE' wasn't actually the 'total' size of the cpu-entry-area, but the per-cpu array size, so rename this to CPU_ENTRY_AREA_ARRAY_SIZE. - Introduce CPU_ENTRY_AREA_TOTAL_SIZE that _is_ the total mapping size, with the IDT included. - Add comments where '+1' denotes the IDT mapping - it wasn't obvious and took me about 3 hours to decode... Finally, because this particular commit is actually applied after this patch: 880a98c33996: ("x86/cpu_entry_area: Add guard page for entry stack on 32bit") Fix the CPU_ENTRY_AREA_PAGES value from 40 pages to the correct 39 pages. All future commits that change cpu_entry_area will have to adjust this value precisely. As a side note, we should probably attempt to remove CPU_ENTRY_AREA_PAGES and derive its value directly from the structure, without causing header hell - but that is an adventure for another day! :-) Fixes: 880a98c33996: ("x86/cpu_entry_area: Add guard page for entry stack on 32bit") Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Andy Lutomirski Cc: stable@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_entry_area.h | 12 +++++++----- arch/x86/include/asm/pgtable_32_types.h | 8 ++++---- arch/x86/mm/cpu_entry_area.c | 4 +++- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 905d89c80d3f..ea866c7bf31d 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -98,7 +98,6 @@ struct cpu_entry_area { */ struct cea_exception_stacks estacks; #endif -#ifdef CONFIG_CPU_SUP_INTEL /* * Per CPU debug store for Intel performance monitoring. Wastes a * full page at the moment. @@ -109,11 +108,13 @@ struct cpu_entry_area { * Reserve enough fixmap PTEs. */ struct debug_store_buffers cpu_debug_buffers; -#endif }; -#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +/* Total size includes the readonly IDT mapping page as well: */ +#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); @@ -121,13 +122,14 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); +/* Single page reserved for the readonly IDT mapping: */ #define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE #define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) #define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) extern struct cpu_entry_area *get_cpu_entry_area(int cpu); diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..1636eb8e5a5b 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,11 +44,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c * to avoid include recursion hell */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 39) -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ - & PMD_MASK) +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) #define LDT_BASE_ADDR \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 752ad11d6868..d9643647a9ce 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -178,7 +178,9 @@ static __init void setup_cpu_entry_area_ptes(void) #ifdef CONFIG_X86_32 unsigned long start, end; - BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + /* The +1 is for the readonly IDT: */ + BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); + BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; -- cgit v1.2.3