From 5e6e9852d6f76e01b2e6803c74258afa5b432bc5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:35 +0200 Subject: uaccess: add infrastructure for kernel builds with set_fs() Add a CONFIG_SET_FS option that is selected by architecturess that implement set_fs, which is all of them initially. If the option is not set stubs for routines related to overriding the address space are provided so that architectures can start to opt out of providing set_fs. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7101ac64bb20..f85c13355732 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -237,6 +237,7 @@ config X86 select HAVE_ARCH_KCSAN if X86_64 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS + select SET_FS imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI config INSTRUCTION_DECODER -- cgit v1.2.3 From 999c83e8ffd90caa00ee6caf054e037cf8a27d0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:38 +0200 Subject: x86: move PAGE_OFFSET, TASK_SIZE & friends to page_{32,64}_types.h At least for 64-bit this moves them closer to some of the defines they are based on, and it prepares for using the TASK_SIZE_MAX definition from assembly. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- arch/x86/include/asm/page_32_types.h | 11 ++++++++ arch/x86/include/asm/page_64_types.h | 38 ++++++++++++++++++++++++++++ arch/x86/include/asm/processor.h | 49 ------------------------------------ 3 files changed, 49 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 565ad755c785..26236925fb2c 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -41,6 +41,17 @@ #define __VIRTUAL_MASK_SHIFT 32 #endif /* CONFIG_X86_PAE */ +/* + * User space process size: 3GB (default). + */ +#define IA32_PAGE_OFFSET PAGE_OFFSET +#define TASK_SIZE PAGE_OFFSET +#define TASK_SIZE_LOW TASK_SIZE +#define TASK_SIZE_MAX TASK_SIZE +#define DEFAULT_MAP_WINDOW TASK_SIZE +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP + /* * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S) */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 288b065955b7..996595c9897e 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -58,6 +58,44 @@ #define __VIRTUAL_MASK_SHIFT 47 #endif +/* + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything executable + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] + */ +#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) + +#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ + 0xc0000000 : 0xFFFFe000) + +#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) +#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) + +#define STACK_TOP TASK_SIZE_LOW +#define STACK_TOP_MAX TASK_SIZE_MAX + /* * Maximum kernel image size is limited to 1 GiB, due to the fixmap living * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 97143d87994c..1618eeb08361 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -782,17 +782,6 @@ static inline void spin_lock_prefetch(const void *x) }) #ifdef CONFIG_X86_32 -/* - * User space process size: 3GB (default). - */ -#define IA32_PAGE_OFFSET PAGE_OFFSET -#define TASK_SIZE PAGE_OFFSET -#define TASK_SIZE_LOW TASK_SIZE -#define TASK_SIZE_MAX TASK_SIZE -#define DEFAULT_MAP_WINDOW TASK_SIZE -#define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX STACK_TOP - #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ @@ -802,44 +791,6 @@ static inline void spin_lock_prefetch(const void *x) #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else -/* - * User space process size. This is the first address outside the user range. - * There are a few constraints that determine this: - * - * On Intel CPUs, if a SYSCALL instruction is at the highest canonical - * address, then that syscall will enter the kernel with a - * non-canonical return address, and SYSRET will explode dangerously. - * We avoid this particular problem by preventing anything executable - * from being mapped at the maximum canonical address. - * - * On AMD CPUs in the Ryzen family, there's a nasty bug in which the - * CPUs malfunction if they execute code from the highest canonical page. - * They'll speculate right off the end of the canonical space, and - * bad things happen. This is worked around in the same way as the - * Intel problem. - * - * With page table isolation enabled, we map the LDT in ... [stay tuned] - */ -#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) - -#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ - 0xc0000000 : 0xFFFFe000) - -#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ - IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) -#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ - IA32_PAGE_OFFSET : TASK_SIZE_MAX) -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ - IA32_PAGE_OFFSET : TASK_SIZE_MAX) - -#define STACK_TOP TASK_SIZE_LOW -#define STACK_TOP_MAX TASK_SIZE_MAX - #define INIT_THREAD { \ .addr_limit = KERNEL_DS, \ } -- cgit v1.2.3 From a1d826d475aafe63775b910e86ccd1bcf1e5a6e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:39 +0200 Subject: x86: make TASK_SIZE_MAX usable from assembly code For 64-bit the only thing missing was a strategic _AC, and for 32-bit we need to use __PAGE_OFFSET instead of PAGE_OFFSET in the TASK_SIZE definition to escape the explicit unsigned long cast. This just works because __PAGE_OFFSET is defined using _AC itself and thus never needs the cast anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- arch/x86/include/asm/page_32_types.h | 4 ++-- arch/x86/include/asm/page_64_types.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 26236925fb2c..f462895a33e4 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -44,8 +44,8 @@ /* * User space process size: 3GB (default). */ -#define IA32_PAGE_OFFSET PAGE_OFFSET -#define TASK_SIZE PAGE_OFFSET +#define IA32_PAGE_OFFSET __PAGE_OFFSET +#define TASK_SIZE __PAGE_OFFSET #define TASK_SIZE_LOW TASK_SIZE #define TASK_SIZE_MAX TASK_SIZE #define DEFAULT_MAP_WINDOW TASK_SIZE diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 996595c9897e..838515daf87b 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -76,7 +76,7 @@ * * With page table isolation enabled, we map the LDT in ... [stay tuned] */ -#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) +#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) -- cgit v1.2.3 From 47058bb54b57962b3958a936ddbc59355e4c5504 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:40 +0200 Subject: x86: remove address space overrides using set_fs() Stop providing the possibility to override the address space using set_fs() now that there is no need for that any more. To properly handle the TASK_SIZE_MAX checking for 4 vs 5-level page tables on x86 a new alternative is introduced, which just like the one in entry_64.S has to use the hardcoded virtual address bits to escape the fact that TASK_SIZE_MAX isn't actually a constant when 5-level page tables are enabled. Signed-off-by: Christoph Hellwig Reviewed-by: Kees Cook Signed-off-by: Al Viro --- arch/x86/Kconfig | 1 - arch/x86/ia32/ia32_aout.c | 1 - arch/x86/include/asm/processor.h | 11 +-------- arch/x86/include/asm/thread_info.h | 2 -- arch/x86/include/asm/uaccess.h | 26 +-------------------- arch/x86/kernel/asm-offsets.c | 3 --- arch/x86/lib/getuser.S | 47 +++++++++++++++++++------------------- arch/x86/lib/putuser.S | 25 ++++++++++---------- 8 files changed, 39 insertions(+), 77 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f85c13355732..7101ac64bb20 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -237,7 +237,6 @@ config X86 select HAVE_ARCH_KCSAN if X86_64 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS - select SET_FS imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI config INSTRUCTION_DECODER diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index ca8a657edf59..a09fc37ead9d 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -239,7 +239,6 @@ beyond_if: (regs)->ss = __USER32_DS; regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; - set_fs(USER_DS); return 0; } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1618eeb08361..189573d95c3a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -482,10 +482,6 @@ extern unsigned int fpu_user_xstate_size; struct perf_event; -typedef struct { - unsigned long seg; -} mm_segment_t; - struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -538,8 +534,6 @@ struct thread_struct { */ unsigned long iopl_emul; - mm_segment_t addr_limit; - unsigned int sig_on_uaccess_err:1; /* Floating point and extended processor state */ @@ -785,15 +779,12 @@ static inline void spin_lock_prefetch(const void *x) #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ - .addr_limit = KERNEL_DS, \ } #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else -#define INIT_THREAD { \ - .addr_limit = KERNEL_DS, \ -} +#define INIT_THREAD { } extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 267701ae3d86..44733a4bfc42 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -102,7 +102,6 @@ struct thread_info { #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ #define TIF_X32 30 /* 32-bit native x86-64 binary */ -#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -131,7 +130,6 @@ struct thread_info { #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_X32 (1 << TIF_X32) -#define _TIF_FSCHECK (1 << TIF_FSCHECK) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index ecefaffd15d4..a4ceda0510ea 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -12,30 +12,6 @@ #include #include -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(-1UL) -#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) - -#define get_fs() (current->thread.addr_limit) -static inline void set_fs(mm_segment_t fs) -{ - current->thread.addr_limit = fs; - /* On user-mode return, check fs is correct */ - set_thread_flag(TIF_FSCHECK); -} - -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -#define user_addr_max() (current->thread.addr_limit.seg) - /* * Test whether a block of memory is a valid user space address. * Returns 0 if the range is valid, nonzero otherwise. @@ -93,7 +69,7 @@ static inline bool pagefault_disabled(void); #define access_ok(addr, size) \ ({ \ WARN_ON_IN_IRQ(); \ - likely(!__range_not_ok(addr, size, user_addr_max())); \ + likely(!__range_not_ok(addr, size, TASK_SIZE_MAX)); \ }) /* diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 3ca07ad552ae..70b7154f4bdd 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -37,9 +37,6 @@ static void __used common(void) OFFSET(TASK_stack_canary, task_struct, stack_canary); #endif - BLANK(); - OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); - BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index c8a85b512796..2f052bc96866 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -35,10 +35,19 @@ #include #include +#ifdef CONFIG_X86_5LEVEL +#define LOAD_TASK_SIZE_MINUS_N(n) \ + ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \ + __stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57 +#else +#define LOAD_TASK_SIZE_MINUS_N(n) \ + mov $(TASK_SIZE_MAX - (n)),%_ASM_DX +#endif + .text SYM_FUNC_START(__get_user_1) - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + LOAD_TASK_SIZE_MINUS_N(0) + cmp %_ASM_DX,%_ASM_AX jae bad_get_user sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ and %_ASM_DX, %_ASM_AX @@ -51,15 +60,13 @@ SYM_FUNC_END(__get_user_1) EXPORT_SYMBOL(__get_user_1) SYM_FUNC_START(__get_user_2) - add $1,%_ASM_AX - jc bad_get_user - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + LOAD_TASK_SIZE_MINUS_N(1) + cmp %_ASM_DX,%_ASM_AX jae bad_get_user sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ and %_ASM_DX, %_ASM_AX ASM_STAC -2: movzwl -1(%_ASM_AX),%edx +2: movzwl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC ret @@ -67,15 +74,13 @@ SYM_FUNC_END(__get_user_2) EXPORT_SYMBOL(__get_user_2) SYM_FUNC_START(__get_user_4) - add $3,%_ASM_AX - jc bad_get_user - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + LOAD_TASK_SIZE_MINUS_N(3) + cmp %_ASM_DX,%_ASM_AX jae bad_get_user sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ and %_ASM_DX, %_ASM_AX ASM_STAC -3: movl -3(%_ASM_AX),%edx +3: movl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC ret @@ -84,29 +89,25 @@ EXPORT_SYMBOL(__get_user_4) SYM_FUNC_START(__get_user_8) #ifdef CONFIG_X86_64 - add $7,%_ASM_AX - jc bad_get_user - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + LOAD_TASK_SIZE_MINUS_N(7) + cmp %_ASM_DX,%_ASM_AX jae bad_get_user sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ and %_ASM_DX, %_ASM_AX ASM_STAC -4: movq -7(%_ASM_AX),%rdx +4: movq (%_ASM_AX),%rdx xor %eax,%eax ASM_CLAC ret #else - add $7,%_ASM_AX - jc bad_get_user_8 - mov PER_CPU_VAR(current_task), %_ASM_DX - cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX + LOAD_TASK_SIZE_MINUS_N(7) + cmp %_ASM_DX,%_ASM_AX jae bad_get_user_8 sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ and %_ASM_DX, %_ASM_AX ASM_STAC -4: movl -7(%_ASM_AX),%edx -5: movl -3(%_ASM_AX),%ecx +4: movl (%_ASM_AX),%edx +5: movl 4(%_ASM_AX),%ecx xor %eax,%eax ASM_CLAC ret diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 7c7c92db8497..358239d77dff 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -31,12 +31,19 @@ * as they get called from within inline assembly. */ -#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX +#ifdef CONFIG_X86_5LEVEL +#define LOAD_TASK_SIZE_MINUS_N(n) \ + ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \ + __stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57 +#else +#define LOAD_TASK_SIZE_MINUS_N(n) \ + mov $(TASK_SIZE_MAX - (n)),%_ASM_BX +#endif .text SYM_FUNC_START(__put_user_1) - ENTER - cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX + LOAD_TASK_SIZE_MINUS_N(0) + cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user ASM_STAC 1: movb %al,(%_ASM_CX) @@ -47,9 +54,7 @@ SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) SYM_FUNC_START(__put_user_2) - ENTER - mov TASK_addr_limit(%_ASM_BX),%_ASM_BX - sub $1,%_ASM_BX + LOAD_TASK_SIZE_MINUS_N(1) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user ASM_STAC @@ -61,9 +66,7 @@ SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) SYM_FUNC_START(__put_user_4) - ENTER - mov TASK_addr_limit(%_ASM_BX),%_ASM_BX - sub $3,%_ASM_BX + LOAD_TASK_SIZE_MINUS_N(3) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user ASM_STAC @@ -75,9 +78,7 @@ SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) SYM_FUNC_START(__put_user_8) - ENTER - mov TASK_addr_limit(%_ASM_BX),%_ASM_BX - sub $7,%_ASM_BX + LOAD_TASK_SIZE_MINUS_N(7) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user ASM_STAC -- cgit v1.2.3