From 0bf5f9492389aa8df5c8e38fcb4488802d24504d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Jul 2019 16:26:27 -0700 Subject: mm: fix the MAP_UNINITIALIZED flag We can't expose UAPI symbols differently based on CONFIG_ symbols, as userspace won't have them available. Instead always define the flag, but only respect it based on the config option. Link: http://lkml.kernel.org/r/20190703122359.18200-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Vladimir Murzin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/xtensa/include/uapi/asm/mman.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index be726062412b..ebbb48842190 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -56,12 +56,8 @@ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ -#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED -# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be +#define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be * uninitialized */ -#else -# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ -#endif /* * Flags for msync -- cgit v1.2.3 From fe6ba88b251aa76a94be2cb441d2e6b7c623b989 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 16 Jul 2019 16:27:01 -0700 Subject: arch: replace _BITUL() in kernel-space headers with BIT() Now that BIT() can be used from assembly code, we can safely replace _BITUL() with equivalent BIT(). UAPI headers are still required to use _BITUL(), but there is no more reason to use it in kernel headers. BIT() is shorter. Link: http://lkml.kernel.org/r/20190609153941.17249-2-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Vineet Gupta Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/pgtable.h | 8 ++-- arch/arc/plat-eznps/include/plat/ctop.h | 15 +++--- arch/arm64/include/asm/sysreg.h | 82 ++++++++++++++++----------------- arch/s390/include/asm/ctl_reg.h | 42 ++++++++--------- arch/s390/include/asm/nmi.h | 20 ++++---- arch/s390/include/asm/processor.h | 20 ++++---- arch/s390/include/asm/ptrace.h | 10 ++-- arch/s390/include/asm/setup.h | 40 ++++++++-------- arch/s390/include/asm/thread_info.h | 34 +++++++------- 9 files changed, 136 insertions(+), 135 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index da446180f17b..1d87c18a2976 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -32,7 +32,7 @@ #ifndef _ASM_ARC_PGTABLE_H #define _ASM_ARC_PGTABLE_H -#include +#include #define __ARCH_USE_5LEVEL_HACK #include #include @@ -215,11 +215,11 @@ #define BITS_FOR_PTE (PGDIR_SHIFT - PAGE_SHIFT) #define BITS_FOR_PGD (32 - PGDIR_SHIFT) -#define PGDIR_SIZE _BITUL(PGDIR_SHIFT) /* vaddr span, not PDG sz */ +#define PGDIR_SIZE BIT(PGDIR_SHIFT) /* vaddr span, not PDG sz */ #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PTE _BITUL(BITS_FOR_PTE) -#define PTRS_PER_PGD _BITUL(BITS_FOR_PGD) +#define PTRS_PER_PTE BIT(BITS_FOR_PTE) +#define PTRS_PER_PGD BIT(BITS_FOR_PGD) /* * Number of entries a user land program use. diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index 309a994f64f0..a4a61531c7fb 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -10,6 +10,7 @@ #error "Incorrect ctop.h include" #endif +#include #include #include @@ -51,19 +52,19 @@ #define CTOP_INST_AXOR_DI_R2_R2_R3 0x4A664C06 /* Do not use D$ for address in 2G-3G */ -#define HW_COMPLY_KRN_NOT_D_CACHED _BITUL(28) +#define HW_COMPLY_KRN_NOT_D_CACHED BIT(28) #define NPS_MSU_EN_CFG 0x80 #define NPS_CRG_BLKID 0x480 -#define NPS_CRG_SYNC_BIT _BITUL(0) +#define NPS_CRG_SYNC_BIT BIT(0) #define NPS_GIM_BLKID 0x5C0 /* GIM registers and fields*/ -#define NPS_GIM_UART_LINE _BITUL(7) -#define NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE _BITUL(10) -#define NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE _BITUL(11) -#define NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE _BITUL(25) -#define NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE _BITUL(26) +#define NPS_GIM_UART_LINE BIT(7) +#define NPS_GIM_DBG_LAN_EAST_TX_DONE_LINE BIT(10) +#define NPS_GIM_DBG_LAN_EAST_RX_RDY_LINE BIT(11) +#define NPS_GIM_DBG_LAN_WEST_TX_DONE_LINE BIT(25) +#define NPS_GIM_DBG_LAN_WEST_RX_RDY_LINE BIT(26) #ifndef __ASSEMBLY__ /* Functional registers definition */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a7522fca1105..06ebcfef73df 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -9,7 +9,7 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H -#include +#include #include /* @@ -478,31 +478,31 @@ #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) /* Common SCTLR_ELx flags. */ -#define SCTLR_ELx_DSSBS (_BITUL(44)) -#define SCTLR_ELx_ENIA (_BITUL(31)) -#define SCTLR_ELx_ENIB (_BITUL(30)) -#define SCTLR_ELx_ENDA (_BITUL(27)) -#define SCTLR_ELx_EE (_BITUL(25)) -#define SCTLR_ELx_IESB (_BITUL(21)) -#define SCTLR_ELx_WXN (_BITUL(19)) -#define SCTLR_ELx_ENDB (_BITUL(13)) -#define SCTLR_ELx_I (_BITUL(12)) -#define SCTLR_ELx_SA (_BITUL(3)) -#define SCTLR_ELx_C (_BITUL(2)) -#define SCTLR_ELx_A (_BITUL(1)) -#define SCTLR_ELx_M (_BITUL(0)) +#define SCTLR_ELx_DSSBS (BIT(44)) +#define SCTLR_ELx_ENIA (BIT(31)) +#define SCTLR_ELx_ENIB (BIT(30)) +#define SCTLR_ELx_ENDA (BIT(27)) +#define SCTLR_ELx_EE (BIT(25)) +#define SCTLR_ELx_IESB (BIT(21)) +#define SCTLR_ELx_WXN (BIT(19)) +#define SCTLR_ELx_ENDB (BIT(13)) +#define SCTLR_ELx_I (BIT(12)) +#define SCTLR_ELx_SA (BIT(3)) +#define SCTLR_ELx_C (BIT(2)) +#define SCTLR_ELx_A (BIT(1)) +#define SCTLR_ELx_M (BIT(0)) #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB) /* SCTLR_EL2 specific flags. */ -#define SCTLR_EL2_RES1 ((_BITUL(4)) | (_BITUL(5)) | (_BITUL(11)) | (_BITUL(16)) | \ - (_BITUL(18)) | (_BITUL(22)) | (_BITUL(23)) | (_BITUL(28)) | \ - (_BITUL(29))) -#define SCTLR_EL2_RES0 ((_BITUL(6)) | (_BITUL(7)) | (_BITUL(8)) | (_BITUL(9)) | \ - (_BITUL(10)) | (_BITUL(13)) | (_BITUL(14)) | (_BITUL(15)) | \ - (_BITUL(17)) | (_BITUL(20)) | (_BITUL(24)) | (_BITUL(26)) | \ - (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \ +#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ + (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ + (BIT(29))) +#define SCTLR_EL2_RES0 ((BIT(6)) | (BIT(7)) | (BIT(8)) | (BIT(9)) | \ + (BIT(10)) | (BIT(13)) | (BIT(14)) | (BIT(15)) | \ + (BIT(17)) | (BIT(20)) | (BIT(24)) | (BIT(26)) | \ + (BIT(27)) | (BIT(30)) | (BIT(31)) | \ (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN @@ -524,23 +524,23 @@ #endif /* SCTLR_EL1 specific flags. */ -#define SCTLR_EL1_UCI (_BITUL(26)) -#define SCTLR_EL1_E0E (_BITUL(24)) -#define SCTLR_EL1_SPAN (_BITUL(23)) -#define SCTLR_EL1_NTWE (_BITUL(18)) -#define SCTLR_EL1_NTWI (_BITUL(16)) -#define SCTLR_EL1_UCT (_BITUL(15)) -#define SCTLR_EL1_DZE (_BITUL(14)) -#define SCTLR_EL1_UMA (_BITUL(9)) -#define SCTLR_EL1_SED (_BITUL(8)) -#define SCTLR_EL1_ITD (_BITUL(7)) -#define SCTLR_EL1_CP15BEN (_BITUL(5)) -#define SCTLR_EL1_SA0 (_BITUL(4)) - -#define SCTLR_EL1_RES1 ((_BITUL(11)) | (_BITUL(20)) | (_BITUL(22)) | (_BITUL(28)) | \ - (_BITUL(29))) -#define SCTLR_EL1_RES0 ((_BITUL(6)) | (_BITUL(10)) | (_BITUL(13)) | (_BITUL(17)) | \ - (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \ +#define SCTLR_EL1_UCI (BIT(26)) +#define SCTLR_EL1_E0E (BIT(24)) +#define SCTLR_EL1_SPAN (BIT(23)) +#define SCTLR_EL1_NTWE (BIT(18)) +#define SCTLR_EL1_NTWI (BIT(16)) +#define SCTLR_EL1_UCT (BIT(15)) +#define SCTLR_EL1_DZE (BIT(14)) +#define SCTLR_EL1_UMA (BIT(9)) +#define SCTLR_EL1_SED (BIT(8)) +#define SCTLR_EL1_ITD (BIT(7)) +#define SCTLR_EL1_CP15BEN (BIT(5)) +#define SCTLR_EL1_SA0 (BIT(4)) + +#define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \ + (BIT(29))) +#define SCTLR_EL1_RES0 ((BIT(6)) | (BIT(10)) | (BIT(13)) | (BIT(17)) | \ + (BIT(27)) | (BIT(30)) | (BIT(31)) | \ (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN @@ -756,13 +756,13 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff -#define CPACR_EL1_ZEN_EL1EN (_BITUL(16)) /* enable EL1 access */ -#define CPACR_EL1_ZEN_EL0EN (_BITUL(17)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ +#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ -#define SYS_MPIDR_SAFE_VAL (_BITUL(31)) +#define SYS_MPIDR_SAFE_VAL (BIT(31)) #ifdef __ASSEMBLY__ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 0cf6b53587db..60f907516335 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -8,27 +8,27 @@ #ifndef __ASM_CTL_REG_H #define __ASM_CTL_REG_H -#include - -#define CR0_CLOCK_COMPARATOR_SIGN _BITUL(63 - 10) -#define CR0_EMERGENCY_SIGNAL_SUBMASK _BITUL(63 - 49) -#define CR0_EXTERNAL_CALL_SUBMASK _BITUL(63 - 50) -#define CR0_CLOCK_COMPARATOR_SUBMASK _BITUL(63 - 52) -#define CR0_CPU_TIMER_SUBMASK _BITUL(63 - 53) -#define CR0_SERVICE_SIGNAL_SUBMASK _BITUL(63 - 54) -#define CR0_UNUSED_56 _BITUL(63 - 56) -#define CR0_INTERRUPT_KEY_SUBMASK _BITUL(63 - 57) -#define CR0_MEASUREMENT_ALERT_SUBMASK _BITUL(63 - 58) - -#define CR2_GUARDED_STORAGE _BITUL(63 - 59) - -#define CR14_UNUSED_32 _BITUL(63 - 32) -#define CR14_UNUSED_33 _BITUL(63 - 33) -#define CR14_CHANNEL_REPORT_SUBMASK _BITUL(63 - 35) -#define CR14_RECOVERY_SUBMASK _BITUL(63 - 36) -#define CR14_DEGRADATION_SUBMASK _BITUL(63 - 37) -#define CR14_EXTERNAL_DAMAGE_SUBMASK _BITUL(63 - 38) -#define CR14_WARNING_SUBMASK _BITUL(63 - 39) +#include + +#define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) +#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) +#define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) +#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) +#define CR0_CPU_TIMER_SUBMASK BIT(63 - 53) +#define CR0_SERVICE_SIGNAL_SUBMASK BIT(63 - 54) +#define CR0_UNUSED_56 BIT(63 - 56) +#define CR0_INTERRUPT_KEY_SUBMASK BIT(63 - 57) +#define CR0_MEASUREMENT_ALERT_SUBMASK BIT(63 - 58) + +#define CR2_GUARDED_STORAGE BIT(63 - 59) + +#define CR14_UNUSED_32 BIT(63 - 32) +#define CR14_UNUSED_33 BIT(63 - 33) +#define CR14_CHANNEL_REPORT_SUBMASK BIT(63 - 35) +#define CR14_RECOVERY_SUBMASK BIT(63 - 36) +#define CR14_DEGRADATION_SUBMASK BIT(63 - 37) +#define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(63 - 38) +#define CR14_WARNING_SUBMASK BIT(63 - 39) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 1e5dc4537bf2..b160da8fa14b 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -12,7 +12,7 @@ #ifndef _ASM_S390_NMI_H #define _ASM_S390_NMI_H -#include +#include #include #define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \ @@ -20,15 +20,15 @@ 1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \ 1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \ 1ULL<<45 | 1ULL<<44) -#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63) -#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5) -#define MCCK_CODE_CP _BITUL(63 - 9) -#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46) -#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) -#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) -#define MCCK_CODE_CR_VALID _BITUL(63 - 29) -#define MCCK_CODE_GS_VALID _BITUL(63 - 36) -#define MCCK_CODE_FC_VALID _BITUL(63 - 43) +#define MCCK_CODE_SYSTEM_DAMAGE BIT(63) +#define MCCK_CODE_EXT_DAMAGE BIT(63 - 5) +#define MCCK_CODE_CP BIT(63 - 9) +#define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46) +#define MCCK_CODE_PSW_MWP_VALID BIT(63 - 20) +#define MCCK_CODE_PSW_IA_VALID BIT(63 - 23) +#define MCCK_CODE_CR_VALID BIT(63 - 29) +#define MCCK_CODE_GS_VALID BIT(63 - 36) +#define MCCK_CODE_FC_VALID BIT(63 - 43) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 14883b1562e0..d56c519bc696 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -12,7 +12,7 @@ #ifndef __ASM_S390_PROCESSOR_H #define __ASM_S390_PROCESSOR_H -#include +#include #define CIF_MCCK_PENDING 0 /* machine check handling is pending */ #define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */ @@ -24,15 +24,15 @@ #define CIF_MCCK_GUEST 7 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */ -#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING) -#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY) -#define _CIF_ASCE_SECONDARY _BITUL(CIF_ASCE_SECONDARY) -#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY) -#define _CIF_FPU _BITUL(CIF_FPU) -#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ) -#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT) -#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST) -#define _CIF_DEDICATED_CPU _BITUL(CIF_DEDICATED_CPU) +#define _CIF_MCCK_PENDING BIT(CIF_MCCK_PENDING) +#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY) +#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY) +#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) +#define _CIF_FPU BIT(CIF_FPU) +#define _CIF_IGNORE_IRQ BIT(CIF_IGNORE_IRQ) +#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) +#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) +#define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 6f70d81c40f2..f009a13afe71 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -7,7 +7,7 @@ #ifndef _S390_PTRACE_H #define _S390_PTRACE_H -#include +#include #include #define PIF_SYSCALL 0 /* inside a system call */ @@ -15,10 +15,10 @@ #define PIF_SYSCALL_RESTART 2 /* restart the current system call */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ -#define _PIF_SYSCALL _BITUL(PIF_SYSCALL) -#define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP) -#define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART) -#define _PIF_GUEST_FAULT _BITUL(PIF_GUEST_FAULT) +#define _PIF_SYSCALL BIT(PIF_SYSCALL) +#define _PIF_PER_TRAP BIT(PIF_PER_TRAP) +#define _PIF_SYSCALL_RESTART BIT(PIF_SYSCALL_RESTART) +#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 925889d360c1..82deb8fc8319 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -6,7 +6,7 @@ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H -#include +#include #include #define EP_OFFSET 0x10008 @@ -21,25 +21,25 @@ * Machine features detected in early.c */ -#define MACHINE_FLAG_VM _BITUL(0) -#define MACHINE_FLAG_KVM _BITUL(1) -#define MACHINE_FLAG_LPAR _BITUL(2) -#define MACHINE_FLAG_DIAG9C _BITUL(3) -#define MACHINE_FLAG_ESOP _BITUL(4) -#define MACHINE_FLAG_IDTE _BITUL(5) -#define MACHINE_FLAG_DIAG44 _BITUL(6) -#define MACHINE_FLAG_EDAT1 _BITUL(7) -#define MACHINE_FLAG_EDAT2 _BITUL(8) -#define MACHINE_FLAG_TOPOLOGY _BITUL(10) -#define MACHINE_FLAG_TE _BITUL(11) -#define MACHINE_FLAG_TLB_LC _BITUL(12) -#define MACHINE_FLAG_VX _BITUL(13) -#define MACHINE_FLAG_TLB_GUEST _BITUL(14) -#define MACHINE_FLAG_NX _BITUL(15) -#define MACHINE_FLAG_GS _BITUL(16) -#define MACHINE_FLAG_SCC _BITUL(17) - -#define LPP_MAGIC _BITUL(31) +#define MACHINE_FLAG_VM BIT(0) +#define MACHINE_FLAG_KVM BIT(1) +#define MACHINE_FLAG_LPAR BIT(2) +#define MACHINE_FLAG_DIAG9C BIT(3) +#define MACHINE_FLAG_ESOP BIT(4) +#define MACHINE_FLAG_IDTE BIT(5) +#define MACHINE_FLAG_DIAG44 BIT(6) +#define MACHINE_FLAG_EDAT1 BIT(7) +#define MACHINE_FLAG_EDAT2 BIT(8) +#define MACHINE_FLAG_TOPOLOGY BIT(10) +#define MACHINE_FLAG_TE BIT(11) +#define MACHINE_FLAG_TLB_LC BIT(12) +#define MACHINE_FLAG_VX BIT(13) +#define MACHINE_FLAG_TLB_GUEST BIT(14) +#define MACHINE_FLAG_NX BIT(15) +#define MACHINE_FLAG_GS BIT(16) +#define MACHINE_FLAG_SCC BIT(17) + +#define LPP_MAGIC BIT(31) #define LPP_PID_MASK _AC(0xffffffff, UL) /* Offsets to entry points in kernel/head.S */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index ce4e17c9aad6..e582fbe59e20 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -8,7 +8,7 @@ #ifndef _ASM_THREAD_INFO_H #define _ASM_THREAD_INFO_H -#include +#include /* * General size of kernel stacks @@ -82,21 +82,21 @@ void arch_setup_new_exec(void); #define TIF_SECCOMP 26 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ -#define _TIF_NOTIFY_RESUME _BITUL(TIF_NOTIFY_RESUME) -#define _TIF_SIGPENDING _BITUL(TIF_SIGPENDING) -#define _TIF_NEED_RESCHED _BITUL(TIF_NEED_RESCHED) -#define _TIF_UPROBE _BITUL(TIF_UPROBE) -#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) -#define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING) -#define _TIF_ISOLATE_BP _BITUL(TIF_ISOLATE_BP) -#define _TIF_ISOLATE_BP_GUEST _BITUL(TIF_ISOLATE_BP_GUEST) - -#define _TIF_31BIT _BITUL(TIF_31BIT) -#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) - -#define _TIF_SYSCALL_TRACE _BITUL(TIF_SYSCALL_TRACE) -#define _TIF_SYSCALL_AUDIT _BITUL(TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP _BITUL(TIF_SECCOMP) -#define _TIF_SYSCALL_TRACEPOINT _BITUL(TIF_SYSCALL_TRACEPOINT) +#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING BIT(TIF_SIGPENDING) +#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) +#define _TIF_UPROBE BIT(TIF_UPROBE) +#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) +#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) +#define _TIF_ISOLATE_BP BIT(TIF_ISOLATE_BP) +#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) + +#define _TIF_31BIT BIT(TIF_31BIT) +#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) + +#define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP BIT(TIF_SECCOMP) +#define _TIF_SYSCALL_TRACEPOINT BIT(TIF_SYSCALL_TRACEPOINT) #endif /* _ASM_THREAD_INFO_H */ -- cgit v1.2.3 From 3a7f0adfe7c27cdaf6dc3456226a430398732e2c Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 16 Jul 2019 16:27:04 -0700 Subject: arch/*: remove unused isa_page_to_bus() isa_page_to_bus() is deprecated and is no longer used anywhere. Remove it entirely. Link: http://lkml.kernel.org/r/20190613161155.16946-1-steve@sk2.org Signed-off-by: Stephen Kitt Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/io.h | 5 ----- arch/arm/include/asm/io.h | 1 - arch/mips/include/asm/io.h | 2 -- arch/x86/include/asm/io.h | 1 - 4 files changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index ccf9d65166bb..af2c0063dc75 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -93,11 +93,6 @@ static inline void * phys_to_virt(unsigned long address) #define page_to_phys(page) page_to_pa(page) -static inline dma_addr_t __deprecated isa_page_to_bus(struct page *page) -{ - return page_to_phys(page); -} - /* Maximum PIO space address supported? */ #define IO_SPACE_LIMIT 0xffff diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index f11c35cf0b74..7a0596fcb2e7 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -30,7 +30,6 @@ * ISA I/O bus memory addresses are 1:1 with the physical address. */ #define isa_virt_to_bus virt_to_phys -#define isa_page_to_bus page_to_phys #define isa_bus_to_virt phys_to_virt /* diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 29997e42480e..1790274c27eb 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -149,8 +149,6 @@ static inline void *isa_bus_to_virt(unsigned long address) return phys_to_virt(address); } -#define isa_page_to_bus page_to_phys - /* * However PCI ones are not necessarily 1:1 and therefore these interfaces * are forbidden in portable PCI drivers. diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index a06a9f8294ea..6bed97ff6db2 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -165,7 +165,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) { return (unsigned int)virt_to_phys(address); } -#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page)) #define isa_bus_to_virt phys_to_virt /* -- cgit v1.2.3 From 0f472d04f59ff89d15b2a1c4eafde7317ddd67a2 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 16 Jul 2019 16:27:33 -0700 Subject: mm/ioremap: probe platform for p4d huge map support Finish up what commit c2febafc6773 ("mm: convert generic code to 5-level paging") started while levelling up P4D huge mapping support at par with PUD and PMD. A new arch call back arch_ioremap_p4d_supported() is added which just maintains status quo (P4D huge map not supported) on x86, arm64 and powerpc. When HAVE_ARCH_HUGE_VMAP is enabled its just a simple check from the arch about the support, hence runtime effects are minimal. Link: http://lkml.kernel.org/r/1561699231-20991-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Thomas Gleixner Acked-by: Michael Ellerman (powerpc) Cc: Catalin Marinas Cc: Will Deacon Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 5 +++++ arch/powerpc/mm/book3s64/radix_pgtable.c | 5 +++++ arch/x86/mm/ioremap.c | 5 +++++ include/linux/io.h | 1 + lib/ioremap.c | 2 ++ 5 files changed, 18 insertions(+) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1b49c08dfa2b..e661469cabdd 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -942,6 +942,11 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) return dt_virt; } +int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} + int __init arch_ioremap_pud_supported(void) { /* diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 65c2ba1e1783..b4ca9e95e678 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1237,3 +1237,8 @@ int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, return 0; } } + +int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index e500f1df1140..63e99f15d7cf 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -459,6 +459,11 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); +int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} + int __init arch_ioremap_pud_supported(void) { #ifdef CONFIG_X86_64 diff --git a/include/linux/io.h b/include/linux/io.h index 9876e5801a9d..accac822336a 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -33,6 +33,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP void __init ioremap_huge_init(void); +int arch_ioremap_p4d_supported(void); int arch_ioremap_pud_supported(void); int arch_ioremap_pmd_supported(void); #else diff --git a/lib/ioremap.c b/lib/ioremap.c index a95161d9c883..0a2ffadc6d71 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -30,6 +30,8 @@ early_param("nohugeiomap", set_nohugeiomap); void __init ioremap_huge_init(void) { if (!ioremap_huge_disabled) { + if (arch_ioremap_p4d_supported()) + ioremap_p4d_capable = 1; if (arch_ioremap_pud_supported()) ioremap_pud_capable = 1; if (arch_ioremap_pmd_supported()) -- cgit v1.2.3 From b98cca444d287a63dd96df04af7fb9793567599e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 16 Jul 2019 16:28:00 -0700 Subject: mm, kprobes: generalize and rename notify_page_fault() as kprobe_page_fault() Architectures which support kprobes have very similar boilerplate around calling kprobe_fault_handler(). Use a helper function in kprobes.h to unify them, based on the x86 code. This changes the behaviour for other architectures when preemption is enabled. Previously, they would have disabled preemption while calling the kprobe handler. However, preemption would be disabled if this fault was due to a kprobe, so we know the fault was not due to a kprobe handler and can simply return failure. This behaviour was introduced in commit a980c0ef9f6d ("x86/kprobes: Refactor kprobes_fault() like kprobe_exceptions_notify()") [anshuman.khandual@arm.com: export kprobe_fault_handler()] Link: http://lkml.kernel.org/r/1561133358-8876-1-git-send-email-anshuman.khandual@arm.com Link: http://lkml.kernel.org/r/1560420444-25737-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Dave Hansen Cc: Michal Hocko Cc: Matthew Wilcox Cc: Mark Rutland Cc: Christophe Leroy Cc: Stephen Rothwell Cc: Andrey Konovalov Cc: Michael Ellerman Cc: Paul Mackerras Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Andy Lutomirski Cc: Vineet Gupta Cc: James Hogan Cc: Paul Burton Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/fault.c | 24 +----------------------- arch/arm64/mm/fault.c | 24 +----------------------- arch/ia64/mm/fault.c | 24 +----------------------- arch/mips/include/asm/kprobes.h | 1 + arch/mips/kernel/kprobes.c | 2 +- arch/powerpc/mm/fault.c | 23 ++--------------------- arch/s390/mm/fault.c | 16 +--------------- arch/sh/mm/fault.c | 18 ++---------------- arch/sparc/mm/fault_64.c | 16 +--------------- arch/x86/mm/fault.c | 21 ++------------------- include/linux/kprobes.h | 19 +++++++++++++++++++ 11 files changed, 32 insertions(+), 156 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 0e417233dad7..890eeaac3cbb 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -27,28 +27,6 @@ #ifdef CONFIG_MMU -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) -{ - int ret = 0; - - if (!user_mode(regs)) { - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, fsr)) - ret = 1; - preempt_enable(); - } - - return ret; -} -#else -static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) -{ - return 0; -} -#endif - /* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. @@ -265,7 +243,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) vm_fault_t fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - if (notify_page_fault(regs, fsr)) + if (kprobe_page_fault(regs, fsr)) return 0; tsk = current; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c8c61b1eb479..9568c116ac7f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -59,28 +59,6 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) return debug_fault_info + DBG_ESR_EVT(esr); } -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) -{ - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (!user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, esr)) - ret = 1; - preempt_enable(); - } - - return ret; -} -#else -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) -{ - return 0; -} -#endif - static void data_abort_decode(unsigned int esr) { pr_alert("Data abort info:\n"); @@ -434,7 +412,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - if (notify_page_fault(regs, esr)) + if (kprobe_page_fault(regs, esr)) return 0; /* diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3c3a283d3172..c2f299fe9e04 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -21,28 +21,6 @@ extern int die(char *, struct pt_regs *, long); -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - int ret = 0; - - if (!user_mode(regs)) { - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, trap)) - ret = 1; - preempt_enable(); - } - - return ret; -} -#else -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - return 0; -} -#endif - /* * Return TRUE if ADDRESS points at a page in the kernel's mapped segment * (inside region 5, on ia64) and that page is present. @@ -116,7 +94,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re /* * This is to handle the kprobes on user space access instructions */ - if (notify_page_fault(regs, TRAP_BRKPT)) + if (kprobe_page_fault(regs, TRAP_BRKPT)) return; if (user_mode(regs)) diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index 3cf8e4d5fa28..68b1e5d458cf 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -41,6 +41,7 @@ do { \ #define kretprobe_blacklist_size 0 void arch_remove_kprobe(struct kprobe *p); +int kprobe_fault_handler(struct pt_regs *regs, int trapnr); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 81ba1d3c367c..6cfae2411c04 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -398,7 +398,7 @@ out: return 1; } -static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d989592b6fc8..8432c281de92 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -42,26 +42,6 @@ #include #include -static inline bool notify_page_fault(struct pt_regs *regs) -{ - bool ret = false; - -#ifdef CONFIG_KPROBES - /* kprobe_running() needs smp_processor_id() */ - if (!user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, 11)) - ret = true; - preempt_enable(); - } -#endif /* CONFIG_KPROBES */ - - if (unlikely(debugger_fault_handler(regs))) - ret = true; - - return ret; -} - /* * Check whether the instruction inst is a store using * an update addressing form which will update r1. @@ -461,8 +441,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; bool must_retry = false; + bool kprobe_fault = kprobe_page_fault(regs, 11); - if (notify_page_fault(regs)) + if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) return 0; if (unlikely(page_fault_is_bad(error_code))) { diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 0ba174f779da..63507662828f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -67,20 +67,6 @@ static int __init fault_init(void) } early_initcall(fault_init); -static inline int notify_page_fault(struct pt_regs *regs) -{ - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, 14)) - ret = 1; - preempt_enable(); - } - return ret; -} - /* * Find out which address space caused the exception. */ @@ -412,7 +398,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) */ clear_pt_regs_flag(regs, PIF_PER_TRAP); - if (notify_page_fault(regs)) + if (kprobe_page_fault(regs, 14)) return 0; mm = tsk->mm; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 3093bc372138..5f51456f4fc7 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -24,20 +24,6 @@ #include #include -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - int ret = 0; - - if (kprobes_built_in() && !user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, trap)) - ret = 1; - preempt_enable(); - } - - return ret; -} - static void force_sig_info_fault(int si_signo, int si_code, unsigned long address) { @@ -412,14 +398,14 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (unlikely(fault_in_kernel_space(address))) { if (vmalloc_fault(address) >= 0) return; - if (notify_page_fault(regs, vec)) + if (kprobe_page_fault(regs, vec)) return; bad_area_nosemaphore(regs, error_code, address); return; } - if (unlikely(notify_page_fault(regs, vec))) + if (unlikely(kprobe_page_fault(regs, vec))) return; /* Only enable interrupts if they were on before the fault */ diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 83fda4d9c3b2..2371fb6b97e4 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -38,20 +38,6 @@ int show_unhandled_signals = 1; -static inline __kprobes int notify_page_fault(struct pt_regs *regs) -{ - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, 0)) - ret = 1; - preempt_enable(); - } - return ret; -} - static void __kprobes unhandled_fault(unsigned long address, struct task_struct *tsk, struct pt_regs *regs) @@ -285,7 +271,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) fault_code = get_thread_fault_code(); - if (notify_page_fault(regs)) + if (kprobe_page_fault(regs, 0)) goto exit_exception; si_code = SEGV_MAPERR; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 794f364cb882..d1634c59ed56 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -46,23 +46,6 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static nokprobe_inline int kprobes_fault(struct pt_regs *regs) -{ - if (!kprobes_built_in()) - return 0; - if (user_mode(regs)) - return 0; - /* - * To be potentially processing a kprobe fault and to be allowed to call - * kprobe_running(), we have to be non-preemptible. - */ - if (preemptible()) - return 0; - if (!kprobe_running()) - return 0; - return kprobe_fault_handler(regs, X86_TRAP_PF); -} - /* * Prefetch quirks: * @@ -1282,7 +1265,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, return; /* kprobes don't want to hook the spurious faults: */ - if (kprobes_fault(regs)) + if (kprobe_page_fault(regs, X86_TRAP_PF)) return; /* @@ -1313,7 +1296,7 @@ void do_user_addr_fault(struct pt_regs *regs, mm = tsk->mm; /* kprobes don't want to hook the spurious faults: */ - if (unlikely(kprobes_fault(regs))) + if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF))) return; /* diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 443d9800ca3f..04bdaf01112c 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -458,4 +458,23 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr) } #endif +/* Returns true if kprobes handled the fault */ +static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs, + unsigned int trap) +{ + if (!kprobes_built_in()) + return false; + if (user_mode(regs)) + return false; + /* + * To be potentially processing a kprobe fault and to be allowed + * to call kprobe_running(), we have to be non-preemptible. + */ + if (preemptible()) + return false; + if (!kprobe_running()) + return false; + return kprobe_fault_handler(regs, trap); +} + #endif /* _LINUX_KPROBES_H */ -- cgit v1.2.3 From 33644b95eb342201511fc951d8fcd10362bd435b Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 16 Jul 2019 16:29:24 -0700 Subject: nds32: fix asm/syscall.h PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain details of the syscall the tracee is blocked in. There are two reasons for a special syscall-related ptrace request. Firstly, with the current ptrace API there are cases when ptracer cannot retrieve necessary information about syscalls. Some examples include: * The notorious int-0x80-from-64-bit-task issue. See [1] for details. In short, if a 64-bit task performs a syscall through int 0x80, its tracer has no reliable means to find out that the syscall was, in fact, a compat syscall, and misidentifies it. * Syscall-enter-stop and syscall-exit-stop look the same for the tracer. Common practice is to keep track of the sequence of ptrace-stops in order not to mix the two syscall-stops up. But it is not as simple as it looks; for example, strace had a (just recently fixed) long-standing bug where attaching strace to a tracee that is performing the execve system call led to the tracer identifying the following syscall-exit-stop as syscall-enter-stop, which messed up all the state tracking. * Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow accessing an undumpable mm"), both PTRACE_PEEKDATA and process_vm_readv become unavailable when the process dumpable flag is cleared. On such architectures as ia64 this results in all syscall arguments being unavailable for the tracer. Secondly, ptracers also have to support a lot of arch-specific code for obtaining information about the tracee. For some architectures, this requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall argument and return value. PTRACE_GET_SYSCALL_INFO returns the following structure: struct ptrace_syscall_info { __u8 op; /* PTRACE_SYSCALL_INFO_* */ __u32 arch __attribute__((__aligned__(sizeof(__u32)))); __u64 instruction_pointer; __u64 stack_pointer; union { struct { __u64 nr; __u64 args[6]; } entry; struct { __s64 rval; __u8 is_error; } exit; struct { __u64 nr; __u64 args[6]; __u32 ret_data; } seccomp; }; }; The structure was chosen according to [2], except for the following changes: * seccomp substructure was added as a superset of entry substructure * the type of nr field was changed from int to __u64 because syscall numbers are, as a practical matter, 64 bits * stack_pointer field was added along with instruction_pointer field since it is readily available and can save the tracer from extra PTRACE_GETREGS/PTRACE_GETREGSET calls * arch is always initialized to aid with tracing system calls such as execve() * instruction_pointer and stack_pointer are always initialized so they could be easily obtained for non-syscall stops * a boolean is_error field was added along with rval field, this way the tracer can more reliably distinguish a return value from an error value strace has been ported to PTRACE_GET_SYSCALL_INFO. Starting with release 4.26, strace uses PTRACE_GET_SYSCALL_INFO API as the preferred mechanism of obtaining syscall information. [1] https://lore.kernel.org/lkml/CA+55aFzcSVmdDj9Lh_gdbz1OzHyEm6ZrGPBDAJnywm2LF_eVyg@mail.gmail.com/ [2] https://lore.kernel.org/lkml/CAObL_7GM0n80N7J_DFw_eQyfLyzq+sf4y2AvsCCV88Tb3AwEHA@mail.gmail.com/ This patch (of 7): All syscall_get_*() and syscall_set_*() functions must be defined as static inline as on all other architectures, otherwise asm/syscall.h cannot be included in more than one compilation unit. This bug has to be fixed in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Link: http://lkml.kernel.org/r/20190510152749.GA28558@altlinux.org Fixes: 1932fbe36e02 ("nds32: System calls handling") Signed-off-by: Dmitry V. Levin Reported-by: kbuild test robot Acked-by: Greentime Hu Cc: Vincent Chen Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Helge Deller [parisc] Cc: James E.J. Bottomley Cc: James Hogan Cc: Kees Cook Cc: Michael Ellerman Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Kuo Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nds32/include/asm/syscall.h | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index 899b2fb4b52f..7b5180d78e20 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -26,7 +26,8 @@ struct pt_regs; * * It's only valid to call this when @task is known to be blocked. */ -int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) +static inline int +syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { return regs->syscallno; } @@ -47,7 +48,8 @@ int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) * system call instruction. This may not be the same as what the * register state looked like at system call entry tracing. */ -void syscall_rollback(struct task_struct *task, struct pt_regs *regs) +static inline void +syscall_rollback(struct task_struct *task, struct pt_regs *regs) { regs->uregs[0] = regs->orig_r0; } @@ -62,7 +64,8 @@ void syscall_rollback(struct task_struct *task, struct pt_regs *regs) * It's only valid to call this when @task is stopped for tracing on exit * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. */ -long syscall_get_error(struct task_struct *task, struct pt_regs *regs) +static inline long +syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->uregs[0]; return IS_ERR_VALUE(error) ? error : 0; @@ -79,7 +82,8 @@ long syscall_get_error(struct task_struct *task, struct pt_regs *regs) * It's only valid to call this when @task is stopped for tracing on exit * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. */ -long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) +static inline long +syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { return regs->uregs[0]; } @@ -99,8 +103,9 @@ long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) * It's only valid to call this when @task is stopped for tracing on exit * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. */ -void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, - int error, long val) +static inline void +syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, + int error, long val) { regs->uregs[0] = (long)error ? error : val; } @@ -118,8 +123,9 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. */ #define SYSCALL_MAX_ARGS 6 -void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned long *args) +static inline void +syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, + unsigned long *args) { args[0] = regs->orig_r0; args++; @@ -138,8 +144,9 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, * It's only valid to call this when @task is stopped for tracing on * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. */ -void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - const unsigned long *args) +static inline void +syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, + const unsigned long *args) { regs->orig_r0 = args[0]; args++; -- cgit v1.2.3 From 6c132dd6d4020ab37a842be93125d3f96432d01d Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 16 Jul 2019 16:29:28 -0700 Subject: hexagon: define syscall_get_error() and syscall_get_return_value() syscall_get_* functions are required to be implemented on all architectures in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. This adds remaining 2 syscall_get_* functions as documented in asm-generic/syscall.h: syscall_get_error and syscall_get_return_value. Link: http://lkml.kernel.org/r/20190510152756.GB28558@altlinux.org Signed-off-by: Dmitry V. Levin Cc: Richard Kuo Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Greentime Hu Cc: Helge Deller [parisc] Cc: James E.J. Bottomley Cc: James Hogan Cc: kbuild test robot Cc: Kees Cook Cc: Michael Ellerman Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Shuah Khan Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/hexagon/include/asm/syscall.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h index 4f054b1ddef5..f6e454f18038 100644 --- a/arch/hexagon/include/asm/syscall.h +++ b/arch/hexagon/include/asm/syscall.h @@ -9,6 +9,8 @@ #define _ASM_HEXAGON_SYSCALL_H #include +#include +#include typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long, unsigned long, @@ -31,6 +33,18 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, &(®s->r00)[0], 6 * sizeof(args[0])); } +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->r00) ? regs->r00 : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r00; +} + static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_HEXAGON; -- cgit v1.2.3 From ba849160a0fa634eaad34183632f84ac82506f14 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 16 Jul 2019 16:29:32 -0700 Subject: mips: define syscall_get_error() syscall_get_error() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_return_value(), and syscall_get_arch() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Link: http://lkml.kernel.org/r/20190510152803.GC28558@altlinux.org Signed-off-by: Dmitry V. Levin Acked-by: Paul Burton Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Ralf Baechle Cc: James Hogan Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Greentime Hu Cc: Helge Deller [parisc] Cc: James E.J. Bottomley Cc: kbuild test robot Cc: Kees Cook Cc: Michael Ellerman Cc: Paul Mackerras Cc: Richard Kuo Cc: Shuah Khan Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/syscall.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index acf80ae0a430..83bb439597d8 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -89,6 +89,12 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, unreachable(); } +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->regs[7] ? -regs->regs[2] : 0; +} + static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { -- cgit v1.2.3 From 2938c1f8faa0b3b95581eba9738cd24f7b791c80 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 16 Jul 2019 16:29:35 -0700 Subject: parisc: define syscall_get_error() syscall_get_error() is required to be implemented on all architectures in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_return_value(), and syscall_get_arch() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Link: http://lkml.kernel.org/r/20190510152812.GD28558@altlinux.org Signed-off-by: Dmitry V. Levin Acked-by: Helge Deller [parisc] Cc: James E.J. Bottomley Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Greentime Hu Cc: James Hogan Cc: kbuild test robot Cc: Kees Cook Cc: Michael Ellerman Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Kuo Cc: Shuah Khan Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/syscall.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h index 80757e43cf2c..00b127a5e09b 100644 --- a/arch/parisc/include/asm/syscall.h +++ b/arch/parisc/include/asm/syscall.h @@ -29,6 +29,13 @@ static inline void syscall_get_arguments(struct task_struct *tsk, args[0] = regs->gr[26]; } +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->gr[28]; + return IS_ERR_VALUE(error) ? error : 0; +} + static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { -- cgit v1.2.3 From f296f1df6e0e5b17654709c05b1821a1b58d329f Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 16 Jul 2019 16:29:39 -0700 Subject: powerpc: define syscall_get_error() syscall_get_error() is required to be implemented on this architecture in addition to already implemented syscall_get_nr(), syscall_get_arguments(), syscall_get_return_value(), and syscall_get_arch() functions in order to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request. Link: http://lkml.kernel.org/r/20190510152824.GE28558@altlinux.org Signed-off-by: Dmitry V. Levin Acked-by: Michael Ellerman Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Greentime Hu Cc: Helge Deller [parisc] Cc: James E.J. Bottomley Cc: James Hogan Cc: kbuild test robot Cc: Kees Cook Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Shuah Khan Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/syscall.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 81abcf6a737b..38d62acfdce7 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -35,6 +35,16 @@ static inline void syscall_rollback(struct task_struct *task, regs->gpr[3] = regs->orig_gpr3; } +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; +} + static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { -- cgit v1.2.3 From 8aa3c927ec10d1230c3ace8357f624479665f701 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 16 Jul 2019 16:30:41 -0700 Subject: mm/mmap: move common defines to mman-common.h Two architecture that use arch specific MMAP flags are powerpc and sparc. We still have few flag values common across them and other architectures. Consolidate this in mman-common.h. Also update the comment to indicate where to find HugeTLB specific reserved values Link: http://lkml.kernel.org/r/20190604090950.31417-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/uapi/asm/mman.h | 6 +----- arch/sparc/include/uapi/asm/mman.h | 6 ------ include/uapi/asm-generic/mman-common.h | 6 +++++- include/uapi/asm-generic/mman.h | 9 ++++----- 4 files changed, 10 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 65065ce32814..c0c737215b00 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -21,15 +21,11 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ + #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ - /* Override any generic PKEY permission defines */ #define PKEY_DISABLE_EXECUTE 0x4 #undef PKEY_ACCESS_MASK diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h index f6f99ec65bb3..cec9f4109687 100644 --- a/arch/sparc/include/uapi/asm/mman.h +++ b/arch/sparc/include/uapi/asm/mman.h @@ -22,10 +22,4 @@ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ - - #endif /* _UAPI__SPARC_MMAN_H__ */ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 93a8b420ce1e..63b1f506ea67 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -20,7 +20,11 @@ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ -/* 0x0100 - 0x40000 flags are defined in asm-generic/mman.h */ +/* 0x0100 - 0x4000 flags are defined in asm-generic/mman.h */ +#define MAP_POPULATE 0x008000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x010000 /* do not block on IO */ +#define MAP_STACK 0x020000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x040000 /* create a huge page mapping */ #define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 2dffcbf705b3..57e8195d0b53 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -9,12 +9,11 @@ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ #define MAP_LOCKED 0x2000 /* pages are locked */ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ -#define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ -#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ -/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ +/* + * Bits [26:31] are reserved, see asm-generic/hugetlb_encode.h + * for MAP_HUGETLB usage + */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -- cgit v1.2.3 From 175967318c3018d01931ac950c82adab5deb47ca Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Jul 2019 16:30:47 -0700 Subject: mm: introduce ARCH_HAS_PTE_DEVMAP ARCH_HAS_ZONE_DEVICE is somewhat meaningless in itself, and combined with the long-out-of-date comment can lead to the impression than an architecture may just enable it (since __add_pages() now "comprehends device memory" for itself) and expect things to work. In practice, however, ZONE_DEVICE users have little chance of functioning correctly without __HAVE_ARCH_PTE_DEVMAP, so let's clean that up the same way as ARCH_HAS_PTE_SPECIAL and make it the proper dependency so the real situation is clearer. Link: http://lkml.kernel.org/r/87554aa78478a02a63f2c4cf60a847279ae3eb3b.1558547956.git.robin.murphy@arm.com Signed-off-by: Robin Murphy Acked-by: Dan Williams Reviewed-by: Ira Weiny Acked-by: Oliver O'Halloran Reviewed-by: Anshuman Khandual Cc: Michael Ellerman Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jerome Glisse Cc: Michal Hocko Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 - arch/x86/Kconfig | 2 +- arch/x86/include/asm/pgtable.h | 4 ++-- arch/x86/include/asm/pgtable_types.h | 1 - include/linux/mm.h | 4 ++-- include/linux/pfn_t.h | 4 ++-- mm/Kconfig | 5 ++--- mm/gup.c | 2 +- 9 files changed, 11 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f516796dd819..d8dcd8820369 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,6 +129,7 @@ config PPC select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 + select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64 @@ -136,7 +137,6 @@ config PPC select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 62e6ea0a7650..8308f32e9782 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -90,7 +90,6 @@ #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */ #define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */ #define _PAGE_DEVMAP _RPAGE_SW1 /* software: ZONE_DEVICE page */ -#define __HAVE_ARCH_PTE_DEVMAP /* * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 879741336771..4a55bd01e918 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -70,6 +70,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 @@ -80,7 +81,6 @@ config X86 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_ZONE_DEVICE if X86_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5e0509b41986..0bc530c4eb13 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -271,7 +271,7 @@ static inline int has_transparent_hugepage(void) return boot_cpu_has(X86_FEATURE_PSE); } -#ifdef __HAVE_ARCH_PTE_DEVMAP +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pmd_devmap(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_DEVMAP); @@ -732,7 +732,7 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } -#ifdef __HAVE_ARCH_PTE_DEVMAP +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t a) { return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d6ff0bbdb394..b5e49e6bac63 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -103,7 +103,6 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) -#define __HAVE_ARCH_PTE_DEVMAP #else #define _PAGE_NX (_AT(pteval_t, 0)) #define _PAGE_DEVMAP (_AT(pteval_t, 0)) diff --git a/include/linux/mm.h b/include/linux/mm.h index baa8b8761d8c..f43f4de4de68 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -547,7 +547,7 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) struct mmu_gather; struct inode; -#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) +#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline int pmd_devmap(pmd_t pmd) { return 0; @@ -1750,7 +1750,7 @@ static inline void sync_mm_rss(struct mm_struct *mm) } #endif -#ifndef __HAVE_ARCH_PTE_DEVMAP +#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { return 0; diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 01e8037023f7..2d9148221e9a 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -97,7 +97,7 @@ static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) #endif #endif -#ifdef __HAVE_ARCH_PTE_DEVMAP +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline bool pfn_t_devmap(pfn_t pfn) { const u64 flags = PFN_DEV|PFN_MAP; @@ -115,7 +115,7 @@ pmd_t pmd_mkdevmap(pmd_t pmd); defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) pud_t pud_mkdevmap(pud_t pud); #endif -#endif /* __HAVE_ARCH_PTE_DEVMAP */ +#endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL static inline bool pfn_t_special(pfn_t pfn) diff --git a/mm/Kconfig b/mm/Kconfig index 495d7368ced8..56cec636a1fc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -649,8 +649,7 @@ config IDLE_PAGE_TRACKING See Documentation/admin-guide/mm/idle_page_tracking.rst for more details. -# arch_add_memory() comprehends device memory -config ARCH_HAS_ZONE_DEVICE +config ARCH_HAS_PTE_DEVMAP bool config ZONE_DEVICE @@ -658,7 +657,7 @@ config ZONE_DEVICE depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on SPARSEMEM_VMEMMAP - depends on ARCH_HAS_ZONE_DEVICE + depends on ARCH_HAS_PTE_DEVMAP select XARRAY_MULTI help diff --git a/mm/gup.c b/mm/gup.c index 8bbaa5523116..98f13ab37bac 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1895,7 +1895,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ -#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static int __gup_device_huge(unsigned long pfn, unsigned long addr, unsigned long end, struct page **pages, int *nr) { -- cgit v1.2.3 From 73b20c84d42de14673a987816dd4d132c7b1f801 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Jul 2019 16:30:51 -0700 Subject: arm64: mm: implement pte_devmap support In order for things like get_user_pages() to work on ZONE_DEVICE memory, we need a software PTE bit to identify device-backed PFNs. Hook this up along with the relevant helpers to join in with ARCH_HAS_PTE_DEVMAP. [robin.murphy@arm.com: build fixes] Link: http://lkml.kernel.org/r/13026c4e64abc17133bbfa07d7731ec6691c0bcd.1559050949.git.robin.murphy@arm.com Link: http://lkml.kernel.org/r/817d92886fc3b33bcbf6e105ee83a74babb3a5aa.1558547956.git.robin.murphy@arm.com Signed-off-by: Robin Murphy Acked-by: Will Deacon Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Dan Williams Cc: David Hildenbrand Cc: Ira Weiny Cc: Jerome Glisse Cc: Michael Ellerman Cc: Michal Hocko Cc: Oliver O'Halloran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable-prot.h | 1 + arch/arm64/include/asm/pgtable.h | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a36ff61321ce..0758d89524d0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -24,6 +24,7 @@ config ARM64 select ARCH_HAS_KCOV select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_DIRECT_MAP diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index f318258a14be..92d2e9f28f28 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -16,6 +16,7 @@ #define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) +#define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3052381baaeb..87a4b2ddc1a1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -79,6 +79,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) #define pte_cont_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ @@ -206,6 +207,11 @@ static inline pmd_t pmd_mkcont(pmd_t pmd) return __pmd(pmd_val(pmd) | PMD_SECT_CONT); } +static inline pte_t pte_mkdevmap(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_DEVMAP)); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); @@ -388,6 +394,11 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd)) +#endif +#define pmd_mkdevmap(pmd) pte_pmd(pte_mkdevmap(pmd_pte(pmd))) + #define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) @@ -673,6 +684,16 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, { return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty); } + +static inline int pud_devmap(pud_t pud) +{ + return 0; +} + +static inline int pgd_devmap(pgd_t pgd) +{ + return 0; +} #endif /* -- cgit v1.2.3 From 79eb597cba06c435b72f220e9d426ae413fc2579 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Tue, 16 Jul 2019 16:30:54 -0700 Subject: mm: add account_locked_vm utility function locked_vm accounting is done roughly the same way in five places, so unify them in a helper. Include the helper's caller in the debug print to distinguish between callsites. Error codes stay the same, so user-visible behavior does too. The one exception is that the -EPERM case in tce_account_locked_vm is removed because Alexey has never seen it triggered. [daniel.m.jordan@oracle.com: v3] Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com [sfr@canb.auug.org.au: fix mm/util.c] Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com Signed-off-by: Daniel Jordan Signed-off-by: Stephen Rothwell Tested-by: Alexey Kardashevskiy Acked-by: Alex Williamson Cc: Alan Tull Cc: Alex Williamson Cc: Benjamin Herrenschmidt Cc: Christoph Lameter Cc: Christophe Leroy Cc: Davidlohr Bueso Cc: Jason Gunthorpe Cc: Mark Rutland Cc: Michael Ellerman Cc: Moritz Fischer Cc: Paul Mackerras Cc: Steve Sistare Cc: Wu Hao Cc: Ira Weiny Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kvm/book3s_64_vio.c | 44 ++------------------- arch/powerpc/mm/book3s64/iommu_api.c | 41 ++------------------ drivers/fpga/dfl-afu-dma-region.c | 53 ++----------------------- drivers/vfio/vfio_iommu_spapr_tce.c | 54 +++----------------------- drivers/vfio/vfio_iommu_type1.c | 17 +------- include/linux/mm.h | 4 ++ mm/util.c | 75 ++++++++++++++++++++++++++++++++++++ 7 files changed, 98 insertions(+), 190 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 5bf05cc774e2..e99a14798ab0 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages) return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; } -static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) -{ - long ret = 0; - - if (!current || !current->mm) - return ret; /* process exited */ - - down_write(¤t->mm->mmap_sem); - - if (inc) { - unsigned long locked, lock_limit; - - locked = current->mm->locked_vm + stt_pages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - current->mm->locked_vm += stt_pages; - } else { - if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) - stt_pages = current->mm->locked_vm; - - current->mm->locked_vm -= stt_pages; - } - - pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, - inc ? '+' : '-', - stt_pages << PAGE_SHIFT, - current->mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK), - ret ? " - exceeded" : ""); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) { struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, @@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) kvm_put_kvm(stt->kvm); - kvmppc_account_memlimit( + account_locked_vm(current->mm, kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); call_rcu(&stt->rcu, release_spapr_tce_table); @@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return -EINVAL; npages = kvmppc_tce_pages(size); - ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); + ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, kfree(stt); fail_acct: - kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); + account_locked_vm(current->mm, kvmppc_stt_pages(npages), false); return ret; } diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 90ee3a89722c..b056cae3388b 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t { u64 dev_hpa; /* Device memory base address */ }; -static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, - unsigned long npages, bool incr) -{ - long ret = 0, locked, lock_limit; - - if (!npages) - return 0; - - down_write(&mm->mmap_sem); - - if (incr) { - locked = mm->locked_vm + npages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - mm->locked_vm += npages; - } else { - if (WARN_ON_ONCE(npages > mm->locked_vm)) - npages = mm->locked_vm; - mm->locked_vm -= npages; - } - - pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", - current ? current->pid : 0, - incr ? '+' : '-', - npages << PAGE_SHIFT, - mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK)); - up_write(&mm->mmap_sem); - - return ret; -} - bool mm_iommu_preregistered(struct mm_struct *mm) { return !list_empty(&mm->context.iommu_group_mem_list); @@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, unsigned long entry, chunk; if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { - ret = mm_iommu_adjust_locked_vm(mm, entries, true); + ret = account_locked_vm(mm, entries, true); if (ret) return ret; @@ -211,7 +178,7 @@ free_exit: kfree(mem); unlock_exit: - mm_iommu_adjust_locked_vm(mm, locked_entries, false); + account_locked_vm(mm, locked_entries, false); return ret; } @@ -311,7 +278,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) unlock_exit: mutex_unlock(&mem_list_mutex); - mm_iommu_adjust_locked_vm(mm, unlock_entries, false); + account_locked_vm(mm, unlock_entries, false); return ret; } diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c index dcd80b088c7b..62f924489db5 100644 --- a/drivers/fpga/dfl-afu-dma-region.c +++ b/drivers/fpga/dfl-afu-dma-region.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "dfl-afu.h" @@ -31,52 +32,6 @@ void afu_dma_region_init(struct dfl_feature_platform_data *pdata) afu->dma_regions = RB_ROOT; } -/** - * afu_dma_adjust_locked_vm - adjust locked memory - * @dev: port device - * @npages: number of pages - * @incr: increase or decrease locked memory - * - * Increase or decrease the locked memory size with npages input. - * - * Return 0 on success. - * Return -ENOMEM if locked memory size is over the limit and no CAP_IPC_LOCK. - */ -static int afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr) -{ - unsigned long locked, lock_limit; - int ret = 0; - - /* the task is exiting. */ - if (!current->mm) - return 0; - - down_write(¤t->mm->mmap_sem); - - if (incr) { - locked = current->mm->locked_vm + npages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - current->mm->locked_vm += npages; - } else { - if (WARN_ON_ONCE(npages > current->mm->locked_vm)) - npages = current->mm->locked_vm; - current->mm->locked_vm -= npages; - } - - dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid, - incr ? '+' : '-', npages << PAGE_SHIFT, - current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK), - ret ? "- exceeded" : ""); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - /** * afu_dma_pin_pages - pin pages of given dma memory region * @pdata: feature device platform data @@ -92,7 +47,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata, struct device *dev = &pdata->dev->dev; int ret, pinned; - ret = afu_dma_adjust_locked_vm(dev, npages, true); + ret = account_locked_vm(current->mm, npages, true); if (ret) return ret; @@ -121,7 +76,7 @@ put_pages: free_pages: kfree(region->pages); unlock_vm: - afu_dma_adjust_locked_vm(dev, npages, false); + account_locked_vm(current->mm, npages, false); return ret; } @@ -141,7 +96,7 @@ static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata, put_all_pages(region->pages, npages); kfree(region->pages); - afu_dma_adjust_locked_vm(dev, npages, false); + account_locked_vm(current->mm, npages, false); dev_dbg(dev, "%ld pages unpinned\n", npages); } diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 7048c9198c21..8ce9ad21129f 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -31,51 +32,6 @@ static void tce_iommu_detach_group(void *iommu_data, struct iommu_group *iommu_group); -static long try_increment_locked_vm(struct mm_struct *mm, long npages) -{ - long ret = 0, locked, lock_limit; - - if (WARN_ON_ONCE(!mm)) - return -EPERM; - - if (!npages) - return 0; - - down_write(&mm->mmap_sem); - locked = mm->locked_vm + npages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - mm->locked_vm += npages; - - pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid, - npages << PAGE_SHIFT, - mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK), - ret ? " - exceeded" : ""); - - up_write(&mm->mmap_sem); - - return ret; -} - -static void decrement_locked_vm(struct mm_struct *mm, long npages) -{ - if (!mm || !npages) - return; - - down_write(&mm->mmap_sem); - if (WARN_ON_ONCE(npages > mm->locked_vm)) - npages = mm->locked_vm; - mm->locked_vm -= npages; - pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid, - npages << PAGE_SHIFT, - mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK)); - up_write(&mm->mmap_sem); -} - /* * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation * @@ -333,7 +289,7 @@ static int tce_iommu_enable(struct tce_container *container) return ret; locked = table_group->tce32_size >> PAGE_SHIFT; - ret = try_increment_locked_vm(container->mm, locked); + ret = account_locked_vm(container->mm, locked, true); if (ret) return ret; @@ -352,7 +308,7 @@ static void tce_iommu_disable(struct tce_container *container) container->enabled = false; BUG_ON(!container->mm); - decrement_locked_vm(container->mm, container->locked_pages); + account_locked_vm(container->mm, container->locked_pages, false); } static void *tce_iommu_open(unsigned long arg) @@ -656,7 +612,7 @@ static long tce_iommu_create_table(struct tce_container *container, if (!table_size) return -EINVAL; - ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT); + ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true); if (ret) return ret; @@ -675,7 +631,7 @@ static void tce_iommu_free_table(struct tce_container *container, unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; iommu_tce_table_put(tbl); - decrement_locked_vm(container->mm, pages); + account_locked_vm(container->mm, pages, false); } static long tce_iommu_create_window(struct tce_container *container, diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index add34adfadc7..054391f30fa8 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -272,21 +272,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) ret = down_write_killable(&mm->mmap_sem); if (!ret) { - if (npage > 0) { - if (!dma->lock_cap) { - unsigned long limit; - - limit = task_rlimit(dma->task, - RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (mm->locked_vm + npage > limit) - ret = -ENOMEM; - } - } - - if (!ret) - mm->locked_vm += npage; - + ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task, + dma->lock_cap); up_write(&mm->mmap_sem); } diff --git a/include/linux/mm.h b/include/linux/mm.h index f43f4de4de68..bd6512559bed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1543,6 +1543,10 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); +int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); +int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, + struct task_struct *task, bool bypass_rlim); + /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ diff --git a/mm/util.c b/mm/util.c index 68575a315dc5..e6351a80f248 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) } #endif +/** + * __account_locked_vm - account locked pages to an mm's locked_vm + * @mm: mm to account against + * @pages: number of pages to account + * @inc: %true if @pages should be considered positive, %false if not + * @task: task used to check RLIMIT_MEMLOCK + * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped + * + * Assumes @task and @mm are valid (i.e. at least one reference on each), and + * that mmap_sem is held as writer. + * + * Return: + * * 0 on success + * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. + */ +int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, + struct task_struct *task, bool bypass_rlim) +{ + unsigned long locked_vm, limit; + int ret = 0; + + lockdep_assert_held_write(&mm->mmap_sem); + + locked_vm = mm->locked_vm; + if (inc) { + if (!bypass_rlim) { + limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked_vm + pages > limit) + ret = -ENOMEM; + } + if (!ret) + mm->locked_vm = locked_vm + pages; + } else { + WARN_ON_ONCE(pages > locked_vm); + mm->locked_vm = locked_vm - pages; + } + + pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid, + (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT, + locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK), + ret ? " - exceeded" : ""); + + return ret; +} +EXPORT_SYMBOL_GPL(__account_locked_vm); + +/** + * account_locked_vm - account locked pages to an mm's locked_vm + * @mm: mm to account against, may be NULL + * @pages: number of pages to account + * @inc: %true if @pages should be considered positive, %false if not + * + * Assumes a non-NULL @mm is valid (i.e. at least one reference on it). + * + * Return: + * * 0 on success, or if mm is NULL + * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. + */ +int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) +{ + int ret; + + if (pages == 0 || !mm) + return 0; + + down_write(&mm->mmap_sem); + ret = __account_locked_vm(mm, pages, inc, current, + capable(CAP_IPC_LOCK)); + up_write(&mm->mmap_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(account_locked_vm); + unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) -- cgit v1.2.3