diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-12 11:24:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-12 11:24:12 -0700 |
commit | 9ad57f6dfc2345ed5d3a8bf4dabac0a34069c54c (patch) | |
tree | 9e12a809a2020178eab234395b0f3e1149cb3c0d /arch | |
parent | 24fb33d40d60bd7d196400e7d5b26ff566fd98b7 (diff) | |
parent | 64019a2e467a288a16b65ab55ddcbf58c1b00187 (diff) | |
download | linux-9ad57f6dfc2345ed5d3a8bf4dabac0a34069c54c.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- most of the rest of MM (memcg, hugetlb, vmscan, proc, compaction,
mempolicy, oom-kill, hugetlbfs, migration, thp, cma, util,
memory-hotplug, cleanups, uaccess, migration, gup, pagemap),
- various other subsystems (alpha, misc, sparse, bitmap, lib, bitops,
checkpatch, autofs, minix, nilfs, ufs, fat, signals, kmod, coredump,
exec, kdump, rapidio, panic, kcov, kgdb, ipc).
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (164 commits)
mm/gup: remove task_struct pointer for all gup code
mm: clean up the last pieces of page fault accountings
mm/xtensa: use general page fault accounting
mm/x86: use general page fault accounting
mm/sparc64: use general page fault accounting
mm/sparc32: use general page fault accounting
mm/sh: use general page fault accounting
mm/s390: use general page fault accounting
mm/riscv: use general page fault accounting
mm/powerpc: use general page fault accounting
mm/parisc: use general page fault accounting
mm/openrisc: use general page fault accounting
mm/nios2: use general page fault accounting
mm/nds32: use general page fault accounting
mm/mips: use general page fault accounting
mm/microblaze: use general page fault accounting
mm/m68k: use general page fault accounting
mm/ia64: use general page fault accounting
mm/hexagon: use general page fault accounting
mm/csky: use general page fault accounting
...
Diffstat (limited to 'arch')
66 files changed, 146 insertions, 348 deletions
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index a4d0c19f1e79..640e1a2f57b4 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -489,10 +489,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) } #endif -#define ioread16be(p) be16_to_cpu(ioread16(p)) -#define ioread32be(p) be32_to_cpu(ioread32(p)) -#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p)) -#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p)) +#define ioread16be(p) swab16(ioread16(p)) +#define ioread32be(p) swab32(ioread32(p)) +#define iowrite16be(v,p) iowrite16(swab16(v), (p)) +#define iowrite32be(v,p) iowrite32(swab32(v), (p)) #define inb_p inb #define inw_p inw diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index 1fe2b56cb861..1b6f25efa247 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -20,7 +20,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * Is a address valid? This does a straightforward calculation rather diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index c2303a8c2b9f..09172f017efc 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -25,6 +25,7 @@ #include <linux/interrupt.h> #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); @@ -116,6 +117,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, #endif if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -148,7 +150,7 @@ retry: /* If for any reason at all we couldn't handle the fault, make sure we exit gracefully rather than endlessly redo the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -164,10 +166,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h index 6a2a5be5026d..871f8ab11bfd 100644 --- a/arch/arc/include/asm/segment.h +++ b/arch/arc/include/asm/segment.h @@ -14,8 +14,7 @@ typedef unsigned long mm_segment_t; #define KERNEL_DS MAKE_MM_SEG(0) #define USER_DS MAKE_MM_SEG(TASK_SIZE) - -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #endif /* __ASSEMBLY__ */ #endif /* __ASMARC_SEGMENT_H */ diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e12c80d71b78..efeba1fe7252 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -91,7 +91,7 @@ fault: goto fail; mmap_read_lock(current->mm); - ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr, + ret = fixup_user_fault(current->mm, (unsigned long) uaddr, FAULT_FLAG_WRITE, NULL); mmap_read_unlock(current->mm); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 7287c793d1c9..f5657cb68e4f 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -105,6 +105,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) if (write) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -130,7 +131,7 @@ retry: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -155,22 +156,9 @@ bad_area: * Major/minor page fault accounting * (in case of retry we only land here once) */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - - if (likely(!(fault & VM_FAULT_ERROR))) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - + if (likely(!(fault & VM_FAULT_ERROR))) /* Normal return path: fault Handled Gracefully */ return; - } if (!user_mode(regs)) goto no_context; diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index b5fdd30252f8..a13d90206472 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -76,7 +76,7 @@ static inline void set_fs(mm_segment_t fs) modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * We use 33-bit arithmetic here. Success returns zero, failure returns @@ -267,7 +267,7 @@ extern int __put_user_8(void *, unsigned long long); */ #define USER_DS KERNEL_DS -#define segment_eq(a, b) (1) +#define uaccess_kernel() (true) #define __addr_ok(addr) ((void)(addr), 1) #define __range_ok(addr, size) ((void)(addr), 0) #define get_fs() (KERNEL_DS) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab2568996ddb..c9dc912b83f0 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -713,7 +713,9 @@ struct page *get_signal_page(void) /* Defer to generic check */ asmlinkage void addr_limit_check_failed(void) { +#ifdef CONFIG_MMU addr_limit_user_check(); +#endif } #ifdef CONFIG_DEBUG_RSEQ diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c6550eddfce1..efa402025031 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -202,7 +202,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) static vm_fault_t __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - unsigned int flags, struct task_struct *tsk) + unsigned int flags, struct task_struct *tsk, + struct pt_regs *regs) { struct vm_area_struct *vma; vm_fault_t fault; @@ -224,7 +225,7 @@ good_area: goto out; } - return handle_mm_fault(vma, addr & PAGE_MASK, flags); + return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); check_stack: /* Don't allow expansion below FIRST_USER_ADDRESS */ @@ -266,6 +267,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -290,7 +293,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, fsr, flags, tsk); + fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because @@ -302,23 +305,7 @@ retry: return 0; } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8d7c466f809b..991dd5f031e4 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -50,7 +50,7 @@ static inline void set_fs(mm_segment_t fs) CONFIG_ARM64_UAO)); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * Test whether a block of memory is a valid user space address. diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index dab88260b137..7689f2031c0c 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -180,7 +180,7 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, /* * We didn't take an exception to get here, set PAN. UAO will be cleared - * by sdei_event_handler()s set_fs(USER_DS) call. + * by sdei_event_handler()s force_uaccess_begin() call. */ __uaccess_enable_hw_pan(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8afb238ff335..f07333e86c2f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -404,7 +404,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags) + unsigned int mm_flags, unsigned long vm_flags, + struct pt_regs *regs) { struct vm_area_struct *vma = find_vma(mm, addr); @@ -428,7 +429,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, */ if (!(vma->vm_flags & vm_flags)) return VM_FAULT_BADACCESS; - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs); } static bool is_el0_instruction_abort(unsigned int esr) @@ -450,7 +451,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { const struct fault_info *inf; struct mm_struct *mm = current->mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; @@ -516,8 +517,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags); - major |= fault & VM_FAULT_MAJOR; + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -538,25 +538,8 @@ retry: * Handle the "normal" (no error) case first. */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) { - /* - * Major/minor page fault accounting is only done - * once. If we go through a retry, it is extremely - * likely that the page will be found in page cache at - * that point. - */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - addr); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - addr); - } - + VM_FAULT_BADACCESS)))) return 0; - } /* * If we are in kernel mode at this point, we have no context to diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index aafcee3e3f7e..73f8b49d485c 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -461,13 +461,3 @@ void __init arm64_numa_init(void) numa_init(dummy_numa_init); } - -/* - * We hope that we will be hotplugging memory on nodes we already know about, - * such that acpi_get_node() succeeds and we never fall back to this... - */ -int memory_add_physaddr_to_nid(u64 addr) -{ - pr_warn("Unknown node for memory at 0x%llx, assuming node 0\n", addr); - return 0; -} diff --git a/arch/csky/include/asm/segment.h b/arch/csky/include/asm/segment.h index db2640d5f575..79ede9b1a646 100644 --- a/arch/csky/include/asm/segment.h +++ b/arch/csky/include/asm/segment.h @@ -13,6 +13,6 @@ typedef struct { #define USER_DS ((mm_segment_t) { 0x80000000UL }) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASM_CSKY_SEGMENT_H */ diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index b1dce9f2f04d..081b178b41b1 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -150,7 +150,8 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0, + regs); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -160,16 +161,6 @@ good_area: goto bad_area; BUG(); } - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - address); - } - mmap_read_unlock(mm); return; diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h index a407978f9f9f..37950725d9b9 100644 --- a/arch/h8300/include/asm/segment.h +++ b/arch/h8300/include/asm/segment.h @@ -33,7 +33,7 @@ static inline mm_segment_t get_fs(void) return USER_DS; } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index cd3808f96b93..ef32c5a84ff3 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -18,6 +18,7 @@ #include <linux/signal.h> #include <linux/extable.h> #include <linux/hardirq.h> +#include <linux/perf_event.h> /* * Decode of hardware exception sends us to one of several @@ -53,6 +54,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -88,7 +91,7 @@ good_area: break; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -96,10 +99,6 @@ good_area: /* The most common case -- we are done. */ if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 8aa473a4b0f4..179243c3dfc7 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -50,7 +50,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * When accessing user memory, we need to make sure the entire area really is in diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3a4dec334cc5..cd9766d2b6e0 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <linux/prefetch.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/processor.h> #include <asm/exception.h> @@ -105,6 +106,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re flags |= FAULT_FLAG_USER; if (mask & VM_WRITE) flags |= FAULT_FLAG_WRITE; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -143,7 +146,7 @@ retry: * sure we exit gracefully rather than endlessly redo the * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -166,10 +169,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 5e1015eb6d0d..f34964271101 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -106,7 +106,5 @@ int memory_add_physaddr_to_nid(u64 addr) return 0; return nid; } - -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h index c6686559e9b7..2b5e68a71ef7 100644 --- a/arch/m68k/include/asm/segment.h +++ b/arch/m68k/include/asm/segment.h @@ -52,7 +52,7 @@ static inline void set_fs(mm_segment_t val) #define set_fs(x) (current_thread_info()->addr_limit = (x)) #endif -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h index 191e75a6bb24..5337bc2c262f 100644 --- a/arch/m68k/include/asm/tlbflush.h +++ b/arch/m68k/include/asm/tlbflush.h @@ -85,10 +85,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { if (vma->vm_mm == current->active_mm) { - mm_segment_t old_fs = get_fs(); - set_fs(USER_DS); + mm_segment_t old_fs = force_uaccess_begin(); + __flush_tlb_one(addr); - set_fs(old_fs); + force_uaccess_end(old_fs); } } diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 508abb63da67..795f483b1050 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -12,6 +12,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/setup.h> #include <asm/traps.h> @@ -84,6 +85,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -134,7 +137,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); pr_debug("handle_mm_fault returns %x\n", fault); if (fault_signal_pending(fault, regs)) @@ -150,16 +153,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 6723c56ec378..304b04ffea2f 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -41,7 +41,7 @@ # define get_fs() (current_thread_info()->addr_limit) # define set_fs(val) (current_thread_info()->addr_limit = (val)) -# define segment_eq(a, b) ((a).seg == (b).seg) +# define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #ifndef CONFIG_MMU diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index a2bfe587b491..b3fed2cecf84 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -28,6 +28,7 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/interrupt.h> +#include <linux/perf_event.h> #include <asm/page.h> #include <asm/mmu.h> @@ -121,6 +122,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -214,7 +217,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -230,10 +233,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (unlikely(fault & VM_FAULT_MAJOR)) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 62b298c50905..61fc01f177a6 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -72,7 +72,7 @@ extern u64 __ua_limit; #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * eva_kernel_access() - determine whether kernel memory access on an EVA system diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 0adce604fa44..126a5f3f4e4c 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -191,17 +191,16 @@ static void emulate_load_store_insn(struct pt_regs *regs, * memory, so we need to "switch" the address limit to * user space, so that address check can work properly. */ - seg = get_fs(); - set_fs(USER_DS); + seg = force_uaccess_begin(); switch (insn.spec3_format.func) { case lhe_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -209,12 +208,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lwe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -222,12 +221,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lhue_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWUE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -235,35 +234,35 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case she_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; case swe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; default: - set_fs(seg); + force_uaccess_end(seg); goto sigill; } - set_fs(seg); + force_uaccess_end(seg); } #endif break; diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 01b168a90434..7c871b14e74a 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -96,6 +96,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -152,12 +154,11 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -168,15 +169,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - tsk->maj_flt++; - } else { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - tsk->min_flt++; - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 3a9219f53ee0..010ba5f1d7dd 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -44,7 +44,7 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size)) diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c index e85bbbadc0e7..e01ad5d17224 100644 --- a/arch/nds32/kernel/process.c +++ b/arch/nds32/kernel/process.c @@ -121,7 +121,7 @@ void show_regs(struct pt_regs *regs) regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]); pr_info(" IRQs o%s Segment %s\n", interrupts_enabled(regs) ? "n" : "ff", - segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user"); + uaccess_kernel() ? "kernel" : "user"); } EXPORT_SYMBOL(show_regs); diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index c8b9061a2ee3..1eb7ded6992b 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -512,7 +512,7 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) { unsigned long inst; int ret = -EFAULT; - mm_segment_t seg = get_fs(); + mm_segment_t seg; inst = get_inst(regs->ipc); @@ -520,13 +520,12 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr, regs->ipc, inst); - set_fs(USER_DS); - + seg = force_uaccess_begin(); if (inst & NDS32_16BIT_INSTRUCTION) ret = do_16((inst >> 16) & 0xffff, regs); else ret = do_32(inst, regs); - set_fs(seg); + force_uaccess_end(seg); return ret; } diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 8fb73f6401a0..f02524eb6d56 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -121,6 +121,8 @@ void do_page_fault(unsigned long entry, unsigned long addr, if (unlikely(faulthandler_disabled() || !mm)) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -206,7 +208,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -228,22 +230,7 @@ good_area: goto bad_area; } - /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. - */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index e83f831a76f9..a741abbed6fb 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -30,7 +30,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(seg) (current_thread_info()->addr_limit = (seg)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __access_ok(addr, len) \ (((signed long)(((long)get_fs().seg) & \ diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 4112ef0e247e..9476feecf512 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -24,6 +24,7 @@ #include <linux/mm.h> #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/mmu_context.h> #include <asm/traps.h> @@ -83,6 +84,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (!mmap_read_trylock(mm)) { if (!user_mode(regs) && !search_exception_tables(regs->ea)) goto bad_area_nosemaphore; @@ -131,7 +134,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -146,16 +149,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 17c24f14615f..48b691530d3e 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -43,7 +43,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* Ensure that the range from addr to addr+size is all within the process' * address space diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index d2224ccca294..ca97d9baab51 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -15,6 +15,7 @@ #include <linux/interrupt.h> #include <linux/extable.h> #include <linux/sched/signal.h> +#include <linux/perf_event.h> #include <linux/uaccess.h> #include <asm/siginfo.h> @@ -103,6 +104,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, if (in_interrupt() || !mm) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -159,7 +162,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -176,10 +179,6 @@ good_area: if (flags & FAULT_FLAG_ALLOW_RETRY) { /*RGD modeled on Cris */ - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ebbb9ffe038c..ed2cd4fb479b 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -14,7 +14,7 @@ #define KERNEL_DS ((mm_segment_t){0}) #define USER_DS ((mm_segment_t){1}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 66ac0719bd49..4bfe2da9fbe3 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -18,6 +18,7 @@ #include <linux/extable.h> #include <linux/uaccess.h> #include <linux/hugetlb.h> +#include <linux/perf_event.h> #include <asm/traps.h> @@ -281,6 +282,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, acc_type = parisc_acctyp(code, regs->iir); if (acc_type & VM_WRITE) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma_prev(mm, address, &prev_vma); @@ -302,7 +304,7 @@ good_area: * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -323,10 +325,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { /* * No need to mmap_read_unlock(mm) as we would diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 64c04ab09112..00699903f1ef 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) #ifdef __powerpc64__ diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index b83abbead4a2..8acd00178956 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; @@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, BUG(); } - if (*flt & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - out_unlock: mmap_read_unlock(mm); return ret; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 925a7231abb3..0add963a849b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -511,7 +511,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); major |= fault & VM_FAULT_MAJOR; @@ -537,14 +537,9 @@ retry: /* * Major/minor page fault accounting. */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + if (major) cmo_account_page_fault(); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } + return 0; } NOKPROBE_SYMBOL(__do_page_fault); diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 8ce9d607b53d..f56c66b3f5fe 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -8,6 +8,8 @@ #ifndef _ASM_RISCV_UACCESS_H #define _ASM_RISCV_UACCESS_H +#include <asm/pgtable.h> /* for TASK_SIZE */ + /* * User space memory access functions */ @@ -62,11 +64,9 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) - /** * access_ok: - Checks if a user space pointer is valid * @addr: User space pointer to start of block to check diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5873835a3e6b..716d64e36f83 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -109,7 +109,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -127,21 +127,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 324438889fe1..f09444d6aeab 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define USER_DS_SACF (3) #define get_fs() (current->thread.mm_segment) -#define segment_eq(a,b) (((a) & 2) == ((b) & 2)) +#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) void set_fs(mm_segment_t fs); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1608fd99bbee..2f177298c663 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2768,7 +2768,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) struct page *page = NULL; mmap_read_lock(kvm->mm); - get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE, + get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, &page, NULL, NULL); mmap_read_unlock(kvm->mm); return page; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 66da278a67fb..6b74b92c1a58 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) { - r = fixup_user_fault(current, current->mm, hva, + r = fixup_user_fault(current->mm, hva, FAULT_FLAG_WRITE, &unlocked); if (r) break; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2f721a923b54..cd74989ce0b0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -273,7 +273,7 @@ retry: rc = get_guest_storage_key(current->mm, vmaddr, &key); if (rc) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -319,7 +319,7 @@ retry: mmap_read_lock(current->mm); rc = reset_guest_reference_bit(current->mm, vmaddr); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -390,7 +390,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) m3 & SSKE_MC); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } @@ -1094,7 +1094,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index aebf9183bedd..4c8c063bce5b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -476,7 +476,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { fault = VM_FAULT_SIGNAL; if (flags & FAULT_FLAG_RETRY_NOWAIT) @@ -486,21 +486,7 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 190357ff86b3..8747487c50a8 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -649,7 +649,7 @@ retry: rc = vmaddr; goto out_up; } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + if (fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked)) { rc = -EFAULT; goto out_up; @@ -879,7 +879,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, BUG_ON(gmap_is_shadow(gmap)); fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; - if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) + if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) return -EFAULT; if (unlocked) /* lost mmap_lock, caller has to retry __gmap_translate */ diff --git a/arch/sh/include/asm/segment.h b/arch/sh/include/asm/segment.h index 33d1d28057cb..02e54a3335d6 100644 --- a/arch/sh/include/asm/segment.h +++ b/arch/sh/include/asm/segment.h @@ -24,8 +24,7 @@ typedef struct { #define USER_DS KERNEL_DS #endif -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/sh/include/asm/sparsemem.h b/arch/sh/include/asm/sparsemem.h index 4eb899751e45..084706bb8cca 100644 --- a/arch/sh/include/asm/sparsemem.h +++ b/arch/sh/include/asm/sparsemem.h @@ -5,11 +5,9 @@ #ifdef __KERNEL__ /* * SECTION_SIZE_BITS 2^N: how big each section will be - * MAX_PHYSADDR_BITS 2^N: how much physical address space we have - * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + * MAX_PHYSMEM_BITS 2^N: how much physical address space we have */ #define SECTION_SIZE_BITS 26 -#define MAX_PHYSADDR_BITS 32 #define MAX_PHYSMEM_BITS 32 #endif diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 058c6181bb30..b62ad0ba2395 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -482,8 +482,6 @@ asmlinkage void do_address_error(struct pt_regs *regs, error_code = lookup_exception_vector(); #endif - oldfs = get_fs(); - if (user_mode(regs)) { int si_code = BUS_ADRERR; unsigned int user_action; @@ -491,13 +489,13 @@ asmlinkage void do_address_error(struct pt_regs *regs, local_irq_enable(); inc_unaligned_user_access(); - set_fs(USER_DS); + oldfs = force_uaccess_begin(); if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1), sizeof(instruction))) { - set_fs(oldfs); + force_uaccess_end(oldfs); goto uspace_segv; } - set_fs(oldfs); + force_uaccess_end(oldfs); /* shout about userspace fixups */ unaligned_fixups_notify(current, instruction, regs); @@ -520,11 +518,11 @@ fixup: goto uspace_segv; } - set_fs(USER_DS); + oldfs = force_uaccess_begin(); tmp = handle_unaligned_access(instruction, regs, &user_mem_access, 0, address); - set_fs(oldfs); + force_uaccess_end(oldfs); if (tmp == 0) return; /* sorted */ diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index fbe1f2fe9a8c..482668a2f6d3 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -482,22 +482,13 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) if (mm_fault_error(regs, error_code, address, fault)) return; if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 613de8096335..cd1379360f08 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -425,15 +425,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 addr) -{ - /* Node 0 for now.. */ - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h index 1dd1b61432db..aa9a676bc341 100644 --- a/arch/sparc/include/asm/sparsemem.h +++ b/arch/sparc/include/asm/sparsemem.h @@ -7,7 +7,6 @@ #include <asm/page.h> #define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS #define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS #endif /* !(__KERNEL__) */ diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index d6d8413eca83..0a2d3ebc4bb8 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -28,7 +28,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) ((current->thread.current_ds) = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test * can be fairly lightweight. diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index bf9d330073b2..698cf69f74e9 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -32,7 +32,7 @@ #define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define set_fs(val) \ do { \ diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index cfef656eda0f..8071bfd72349 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -234,7 +234,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -250,15 +250,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; @@ -410,7 +401,7 @@ good_area: if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(vma, address, flags)) { + switch (handle_mm_fault(vma, address, flags, NULL)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a3806614e4dc..0a6bcc85fba7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -422,7 +422,7 @@ good_area: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) goto exit_exception; @@ -438,15 +438,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 2b3afa354a90..ad12f78bda7e 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -71,7 +71,7 @@ good_area: do { vm_fault_t fault; - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; @@ -88,10 +88,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2f3e8f2a958f..ecefaffd15d4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,7 +33,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (current->thread.addr_limit.seg) /* diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0c7643d9f7cb..35f1498e9832 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1139,7 +1139,7 @@ void do_user_addr_fault(struct pt_regs *regs, struct vm_area_struct *vma; struct task_struct *tsk; struct mm_struct *mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; tsk = current; @@ -1291,8 +1291,7 @@ good_area: * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. */ - fault = handle_mm_fault(vma, address, flags); - major |= fault & VM_FAULT_MAJOR; + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -1319,18 +1318,6 @@ good_area: return; } - /* - * Major/minor page fault accounting. If any of the events - * returned VM_FAULT_MAJOR, we account it as a major fault. - */ - if (major) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } - check_v8086_mode(regs, address, tsk); } NOKPROBE_SYMBOL(do_user_addr_fault); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3b246ae40c8f..a4ac13cc3fdc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1452,6 +1452,15 @@ static unsigned long probe_memory_block_size(void) goto done; } + /* + * Use max block size to minimize overhead on bare metal, where + * alignment for memory hotplug isn't a concern. + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + bz = MAX_BLOCK_SIZE; + goto done; + } + /* Find the largest allowed block size that aligns to memory end */ for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { if (IS_ALIGNED(boot_mem_end, bz)) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index b05f45e5e8e2..aa76ec2d359b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -929,5 +929,4 @@ int memory_add_physaddr_to_nid(u64 start) nid = numa_meminfo.blk[0].nid; return nid; } -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index e57f0d0a88d8..b9758119feca 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -35,7 +35,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) (current->thread.current_ds = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __kernel_ok (uaccess_kernel()) #define __user_ok(addr, size) \ diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index c128dcc7c85b..7666408ce12a 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -72,6 +72,9 @@ void do_page_fault(struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -107,7 +110,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -122,10 +125,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; @@ -139,12 +138,6 @@ good_area: } mmap_read_unlock(mm); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (flags & VM_FAULT_MAJOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - else - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - return; /* Something tried to access memory that isn't in our memory map.. |