diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 12:32:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 12:32:41 -0700 |
commit | 98931dd95fd489fcbfa97da563505a6f071d7c77 (patch) | |
tree | 44683fc4a92efa614acdca2742a7ff19d26da1e3 /arch/x86 | |
parent | df202b452fe6c6d6f1351bad485e2367ef1e644e (diff) | |
parent | f403f22f8ccb12860b2b62fec3173c6ccd45938b (diff) | |
download | linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.bz2 |
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
"Almost all of MM here. A few things are still getting finished off,
reviewed, etc.
- Yang Shi has improved the behaviour of khugepaged collapsing of
readonly file-backed transparent hugepages.
- Johannes Weiner has arranged for zswap memory use to be tracked and
managed on a per-cgroup basis.
- Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for
runtime enablement of the recent huge page vmemmap optimization
feature.
- Baolin Wang contributes a series to fix some issues around hugetlb
pagetable invalidation.
- Zhenwei Pi has fixed some interactions between hwpoisoned pages and
virtualization.
- Tong Tiangen has enabled the use of the presently x86-only
page_table_check debugging feature on arm64 and riscv.
- David Vernet has done some fixup work on the memcg selftests.
- Peter Xu has taught userfaultfd to handle write protection faults
against shmem- and hugetlbfs-backed files.
- More DAMON development from SeongJae Park - adding online tuning of
the feature and support for monitoring of fixed virtual address
ranges. Also easier discovery of which monitoring operations are
available.
- Nadav Amit has done some optimization of TLB flushing during
mprotect().
- Neil Brown continues to labor away at improving our swap-over-NFS
support.
- David Hildenbrand has some fixes to anon page COWing versus
get_user_pages().
- Peng Liu fixed some errors in the core hugetlb code.
- Joao Martins has reduced the amount of memory consumed by
device-dax's compound devmaps.
- Some cleanups of the arch-specific pagemap code from Anshuman
Khandual.
- Muchun Song has found and fixed some errors in the TLB flushing of
transparent hugepages.
- Roman Gushchin has done more work on the memcg selftests.
... and, of course, many smaller fixes and cleanups. Notably, the
customary million cleanup serieses from Miaohe Lin"
* tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits)
mm: kfence: use PAGE_ALIGNED helper
selftests: vm: add the "settings" file with timeout variable
selftests: vm: add "test_hmm.sh" to TEST_FILES
selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests
selftests: vm: add migration to the .gitignore
selftests/vm/pkeys: fix typo in comment
ksm: fix typo in comment
selftests: vm: add process_mrelease tests
Revert "mm/vmscan: never demote for memcg reclaim"
mm/kfence: print disabling or re-enabling message
include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace"
include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion"
mm: fix a potential infinite loop in start_isolate_page_range()
MAINTAINERS: add Muchun as co-maintainer for HugeTLB
zram: fix Kconfig dependency warning
mm/shmem: fix shmem folio swapoff hang
cgroup: fix an error handling path in alloc_pagecache_max_30M()
mm: damon: use HPAGE_PMD_SIZE
tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate
nodemask.h: fix compilation error with GCC12
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 54 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64_types.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 97 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/mman.h | 14 | ||||
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/pgprot.c | 35 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 13 |
11 files changed, 196 insertions, 35 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f59517c4356..cf531fbcd229 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -76,7 +76,6 @@ config X86 select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER - select ARCH_HAS_FILTER_PGPROT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 @@ -95,6 +94,7 @@ config X86 select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_VM_GET_PAGE_PROT select ARCH_HAS_DEBUG_WX select ARCH_HAS_ZONE_DMA_SET if EXPERT select ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -121,6 +121,7 @@ config X86 select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP if X86_64 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 62ab07e24aef..44e2d6f1dbaa 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -649,11 +649,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) -static inline pgprot_t arch_filter_pgprot(pgprot_t prot) -{ - return canon_pgprot(prot); -} - static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) @@ -1077,16 +1072,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, return pte; } -#define __HAVE_ARCH_PTEP_CLEAR -static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - if (IS_ENABLED(CONFIG_PAGE_TABLE_CHECK)) - ptep_get_and_clear(mm, addr, ptep); - else - pte_clear(mm, addr, ptep); -} - #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -1173,6 +1158,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } } #endif + +#define __HAVE_ARCH_PMDP_INVALIDATE_AD +extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + /* * Page table pages are page-aligned. The lower half of the top * level is used for userspace and the top half for the kernel. @@ -1291,6 +1281,23 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { } +#ifdef _PAGE_SWP_EXCLUSIVE +#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); +} + +static inline int pte_swp_exclusive(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE); +} +#endif /* _PAGE_SWP_EXCLUSIVE */ #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) @@ -1430,6 +1437,23 @@ static inline bool arch_faults_on_old_pte(void) return false; } +#ifdef CONFIG_PAGE_TABLE_CHECK +static inline bool pte_user_accessible_page(pte_t pte) +{ + return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); +} + +static inline bool pmd_user_accessible_page(pmd_t pmd) +{ + return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); +} + +static inline bool pud_user_accessible_page(pud_t pud) +{ + return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); +} +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 56d0399a0cd1..e479491da8d5 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -186,7 +186,7 @@ static inline void native_pgd_clear(pgd_t *pgd) * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| E|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -203,6 +203,8 @@ static inline void native_pgd_clear(pgd_t *pgd) * F (2) in swp entry is used to record when a pagetable is * writeprotected by userfaultfd WP support. * + * E (3) in swp entry is used to rememeber PG_anon_exclusive. + * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 91ac10654570..70e360a2e5fb 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -163,4 +163,9 @@ extern unsigned int ptrs_per_p4d; #define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t)) +/* + * We borrow bit 3 to remember PG_anon_exclusive. + */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_PWT + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 407084d9fd99..bdaf8391e2e0 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -110,9 +110,11 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) +#define _PAGE_SOFTW4 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW4) #else #define _PAGE_NX (_AT(pteval_t, 0)) #define _PAGE_DEVMAP (_AT(pteval_t, 0)) +#define _PAGE_SOFTW4 (_AT(pteval_t, 0)) #endif #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 98fa0a114074..4af5579c7ef7 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -259,6 +259,103 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); +static inline bool pte_flags_need_flush(unsigned long oldflags, + unsigned long newflags, + bool ignore_access) +{ + /* + * Flags that require a flush when cleared but not when they are set. + * Only include flags that would not trigger spurious page-faults. + * Non-present entries are not cached. Hardware would set the + * dirty/access bit if needed without a fault. + */ + const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | + _PAGE_ACCESSED; + const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | + _PAGE_SOFTW3 | _PAGE_SOFTW4; + const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | + _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | + _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | + _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX; + unsigned long diff = oldflags ^ newflags; + + BUILD_BUG_ON(flush_on_clear & software_flags); + BUILD_BUG_ON(flush_on_clear & flush_on_change); + BUILD_BUG_ON(flush_on_change & software_flags); + + /* Ignore software flags */ + diff &= ~software_flags; + + if (ignore_access) + diff &= ~_PAGE_ACCESSED; + + /* + * Did any of the 'flush_on_clear' flags was clleared set from between + * 'oldflags' and 'newflags'? + */ + if (diff & oldflags & flush_on_clear) + return true; + + /* Flush on modified flags. */ + if (diff & flush_on_change) + return true; + + /* Ensure there are no flags that were left behind */ + if (IS_ENABLED(CONFIG_DEBUG_VM) && + (diff & ~(flush_on_clear | software_flags | flush_on_change))) { + VM_WARN_ON_ONCE(1); + return true; + } + + return false; +} + +/* + * pte_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace PTEs. + */ +static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pte_flags(oldpte) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pte_pfn(oldpte) != pte_pfn(newpte)) + return true; + + /* + * check PTE flags; ignore access-bit; see comment in + * ptep_clear_flush_young(). + */ + return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte), + true); +} +#define pte_needs_flush pte_needs_flush + +/* + * huge_pmd_needs_flush() checks whether permissions were demoted and require a + * flush. It should only be used for userspace huge PMDs. + */ +static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) +{ + /* !PRESENT -> * ; no need for flush */ + if (!(pmd_flags(oldpmd) & _PAGE_PRESENT)) + return false; + + /* PFN changed ; needs flush */ + if (pmd_pfn(oldpmd) != pmd_pfn(newpmd)) + return true; + + /* + * check PMD flags; do not ignore access-bit; see + * pmdp_clear_flush_young(). + */ + return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd), + false); +} +#define huge_pmd_needs_flush huge_pmd_needs_flush + #endif /* !MODULE */ static inline void __native_tlb_flush_global(unsigned long cr4) diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index d4a8d0424bfb..775dbd3aff73 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -5,20 +5,6 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -/* - * Take the 4 protection key bits out of the vma->vm_flags - * value and turn them in to the bits that we can put in - * to a pte. - * - * Only override these if Protection Keys are available - * (which is only on 64-bit). - */ -#define arch_vm_get_page_prot(vm_flags) __pgprot( \ - ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \ - ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \ - ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \ - ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0)) - #define arch_calc_vm_prot_bits(prot, key) ( \ ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index d957dc15b371..f8220fd2c169 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -20,7 +20,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \ - pgtable.o physaddr.o tlb.o cpu_entry_area.o maccess.o + pgtable.o physaddr.o tlb.o cpu_entry_area.o maccess.o pgprot.o obj-y += pat/ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 61d0ab154f96..cb290a2f0747 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1269,7 +1269,7 @@ static struct kcore_list kcore_vsyscall; static void __init register_page_bootmem_info(void) { -#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP) +#if defined(CONFIG_NUMA) || defined(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) int i; for_each_online_node(i) diff --git a/arch/x86/mm/pgprot.c b/arch/x86/mm/pgprot.c new file mode 100644 index 000000000000..763742782286 --- /dev/null +++ b/arch/x86/mm/pgprot.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/export.h> +#include <linux/mm.h> +#include <asm/pgtable.h> + +pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + unsigned long val = pgprot_val(protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]); + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + /* + * Take the 4 protection key bits out of the vma->vm_flags value and + * turn them in to the bits that we can put in to a pte. + * + * Only override these if Protection Keys are available (which is only + * on 64-bit). + */ + if (vm_flags & VM_PKEY_BIT0) + val |= _PAGE_PKEY_BIT0; + if (vm_flags & VM_PKEY_BIT1) + val |= _PAGE_PKEY_BIT1; + if (vm_flags & VM_PKEY_BIT2) + val |= _PAGE_PKEY_BIT2; + if (vm_flags & VM_PKEY_BIT3) + val |= _PAGE_PKEY_BIT3; +#endif + + val = __sme_set(val); + if (val & _PAGE_PRESENT) + val &= __supported_pte_mask; + return __pgprot(val); +} +EXPORT_SYMBOL(vm_get_page_prot); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3481b35cb4ec..a932d7712d85 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -608,6 +608,16 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, return young; } + +pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + /* + * No flush is necessary. Once an invalid PTE is established, the PTE's + * access and dirty bits cannot be updated. + */ + return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); +} #endif /** @@ -676,9 +686,8 @@ int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) * * No 512GB pages yet -- always return 0 */ -int p4d_clear_huge(p4d_t *p4d) +void p4d_clear_huge(p4d_t *p4d) { - return 0; } #endif |