diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-15 08:51:42 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-15 08:51:42 +0900 |
commit | b5d903c2d656e9bc54bc76554a477d796a63120d (patch) | |
tree | 85b5d401eb5d63a88d2b054286a979f90344cbb5 | |
parent | 7a932516f55cdf430c7cce78df2010ff7db6b874 (diff) | |
parent | ee410f15b1418f2f4428e79980674c979081bcb7 (diff) | |
download | linux-b5d903c2d656e9bc54bc76554a477d796a63120d.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- MM remainders
- various misc things
- kcov updates
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (27 commits)
lib/test_printf.c: call wait_for_random_bytes() before plain %p tests
hexagon: drop the unused variable zero_page_mask
hexagon: fix printk format warning in setup.c
mm: fix oom_kill event handling
treewide: use PHYS_ADDR_MAX to avoid type casting ULLONG_MAX
mm: use octal not symbolic permissions
ipc: use new return type vm_fault_t
sysvipc/sem: mitigate semnum index against spectre v1
fault-injection: reorder config entries
arm: port KCOV to arm
sched/core / kcov: avoid kcov_area during task switch
kcov: prefault the kcov_area
kcov: ensure irq code sees a valid area
kernel/relay.c: change return type to vm_fault_t
exofs: avoid VLA in structures
coredump: fix spam with zero VMA process
fat: use fat_fs_error() instead of BUG_ON() in __fat_get_block()
proc: skip branch in /proc/*/* lookup
mremap: remove LATENCY_LIMIT from mremap to reduce the number of TLB shootdowns
mm/memblock: add missing include <linux/bootmem.h>
...
56 files changed, 397 insertions, 212 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0be4397f3ccc..483d2858e367 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -8,9 +8,10 @@ config ARM select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_KCOV select ARCH_HAS_PTE_SPECIAL if ARM_LPAE - select ARCH_HAS_SET_MEMORY select ARCH_HAS_PHYS_TO_DMA + select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index a3c5fbcad4ab..1f5a5ffe7fcf 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -25,6 +25,9 @@ endif GCOV_PROFILE := n +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # # Architecture dependencies # diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 7fc0638f263a..d2b5ec9c4b92 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -23,3 +23,11 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o obj-$(CONFIG_KVM_ARM_HOST) += switch.o CFLAGS_switch.o += $(CFLAGS_ARMV7VE) obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o + +# KVM code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index bb4118213fee..f4efff9d3afb 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -30,6 +30,9 @@ CFLAGS_vgettimeofday.o = -O2 # Disable gcov profiling for VDSO code GCOV_PROFILE := n +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # Force dependency $(obj)/vdso.o : $(obj)/vdso.so diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1b18b4722420..325cfb3b858a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -310,7 +310,7 @@ static void __init arm64_memory_present(void) } #endif -static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX; +static phys_addr_t memory_limit = PHYS_ADDR_MAX; /* * Limit the memory size that was specified via FDT. @@ -401,7 +401,7 @@ void __init arm64_memblock_init(void) * high up in memory, add back the kernel region that must be accessible * via the linear mapping. */ - if (memory_limit != (phys_addr_t)ULLONG_MAX) { + if (memory_limit != PHYS_ADDR_MAX) { memblock_mem_limit_remove_map(memory_limit); memblock_add(__pa_symbol(_text), (u64)(_end - _text)); } @@ -666,7 +666,7 @@ __setup("keepinitrd", keepinitrd_setup); */ static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p) { - if (memory_limit != (phys_addr_t)ULLONG_MAX) { + if (memory_limit != PHYS_ADDR_MAX) { pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20); } else { pr_emerg("Memory Limit: none\n"); diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index aef02f7ca8aa..65125d0b02dd 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -30,7 +30,6 @@ /* A handy thing to have if one has the RAM. Declared in head.S */ extern unsigned long empty_zero_page; -extern unsigned long zero_page_mask; /* * The PTE model described here is that of the Hexagon Virtual Machine, diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c index 6981949f5df3..dc8c7e75b5d1 100644 --- a/arch/hexagon/kernel/setup.c +++ b/arch/hexagon/kernel/setup.c @@ -66,7 +66,7 @@ void __init setup_arch(char **cmdline_p) */ __vmsetvec(_K_VM_event_vector); - printk(KERN_INFO "PHYS_OFFSET=0x%08x\n", PHYS_OFFSET); + printk(KERN_INFO "PHYS_OFFSET=0x%08lx\n", PHYS_OFFSET); /* * Simulator has a few differences from the hardware. diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 192584d5ac2f..1495d45e472d 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -39,9 +39,6 @@ unsigned long __phys_offset; /* physical kernel offset >> 12 */ /* Set as variable to limit PMD copies */ int max_kernel_seg = 0x303; -/* think this should be (page_size-1) the way it's used...*/ -unsigned long zero_page_mask; - /* indicate pfn's of high memory */ unsigned long highstart_pfn, highend_pfn; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 563188ac6fa2..2c96c0c68116 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -93,7 +93,7 @@ void __init add_memory_region(phys_addr_t start, phys_addr_t size, long type) * If the region reaches the top of the physical address space, adjust * the size slightly so that (start + size) doesn't overflow */ - if (start + size - 1 == (phys_addr_t)ULLONG_MAX) + if (start + size - 1 == PHYS_ADDR_MAX) --size; /* Sanity check */ @@ -376,7 +376,7 @@ static void __init bootmem_init(void) unsigned long reserved_end; unsigned long mapstart = ~0UL; unsigned long bootmap_size; - phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX; + phys_addr_t ramstart = PHYS_ADDR_MAX; bool bootmap_valid = false; int i; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8cecda4bd66a..5c8530d0c611 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -215,7 +215,7 @@ void __init mem_topology_setup(void) /* Place all memblock_regions in the same node and merge contiguous * memblock_regions */ - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); } void __init initmem_init(void) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 8aeb1aabe76e..f396048a0d68 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1620,7 +1620,7 @@ static void __init bootmem_init_nonnuma(void) (top_of_ram - total_ram) >> 20); init_node_masks_nonnuma(); - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); allocate_node_data(0); node_set_online(0); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fec82b577c18..cee58a972cb2 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -706,7 +706,9 @@ void __init init_mem_mapping(void) */ int devmem_is_allowed(unsigned long pagenr) { - if (page_is_ram(pagenr)) { + if (region_intersects(PFN_PHYS(pagenr), PAGE_SIZE, + IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) + != REGION_DISJOINT) { /* * For disallowed memory regions in the low 1MB range, * request that the page be shown as all zeros. diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c893c6a3d707..979e0a02cbe1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -692,7 +692,7 @@ void __init initmem_init(void) high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); sparse_memory_present_with_active_regions(0); #ifdef CONFIG_FLATMEM diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 17383f9677fa..045f492d5f68 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -742,7 +742,7 @@ kernel_physical_mapping_init(unsigned long paddr_start, #ifndef CONFIG_NUMA void __init initmem_init(void) { - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); } #endif diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 80d1a885def5..b5214c143fee 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -193,7 +193,7 @@ static __init void reserve_regions(void) * uses its own memory map instead. */ memblock_dump_all(); - memblock_remove(0, (phys_addr_t)ULLONG_MAX); + memblock_remove(0, PHYS_ADDR_MAX); for_each_efi_memory_desc(md) { paddr = md->phys_addr; diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index 2190debf3d35..2bf8e7c49f2a 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -686,7 +686,7 @@ static int q6v5_mpss_load(struct q6v5 *qproc) struct elf32_hdr *ehdr; phys_addr_t mpss_reloc; phys_addr_t boot_addr; - phys_addr_t min_addr = (phys_addr_t)ULLONG_MAX; + phys_addr_t min_addr = PHYS_ADDR_MAX; phys_addr_t max_addr = 0; bool relocate = false; char seg_name[10]; diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index 17b314d9a148..dc09d7ac905f 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -50,7 +50,7 @@ ssize_t qcom_mdt_get_size(const struct firmware *fw) const struct elf32_phdr *phdrs; const struct elf32_phdr *phdr; const struct elf32_hdr *ehdr; - phys_addr_t min_addr = (phys_addr_t)ULLONG_MAX; + phys_addr_t min_addr = PHYS_ADDR_MAX; phys_addr_t max_addr = 0; int i; @@ -97,7 +97,7 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw, const struct elf32_hdr *ehdr; const struct firmware *seg_fw; phys_addr_t mem_reloc; - phys_addr_t min_addr = (phys_addr_t)ULLONG_MAX; + phys_addr_t min_addr = PHYS_ADDR_MAX; phys_addr_t max_addr = 0; size_t fw_name_len; ssize_t offset; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 070b6184642d..0ac456b52bdd 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1621,8 +1621,8 @@ static int fill_files_note(struct memelfnote *note) if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ return -EINVAL; size = round_up(size, PAGE_SIZE); - data = vmalloc(size); - if (!data) + data = kvmalloc(size, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(data)) return -ENOMEM; start_end_ofs = data + 2; @@ -1639,7 +1639,7 @@ static int fill_files_note(struct memelfnote *note) filename = file_path(file, name_curpos, remaining); if (IS_ERR(filename)) { if (PTR_ERR(filename) == -ENAMETOOLONG) { - vfree(data); + kvfree(data); size = size * 5 / 4; goto alloc; } @@ -1932,7 +1932,7 @@ static void free_note_info(struct elf_note_info *info) kfree(t); } kfree(info->psinfo.data); - vfree(info->files.data); + kvfree(info->files.data); } #else @@ -2148,7 +2148,7 @@ static void free_note_info(struct elf_note_info *info) /* Free data possibly allocated by fill_files_note(): */ if (info->notes_files) - vfree(info->notes_files->data); + kvfree(info->notes_files->data); kfree(info->prstatus); kfree(info->psinfo); @@ -2294,8 +2294,9 @@ static int elf_core_dump(struct coredump_params *cprm) if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz)) goto end_coredump; - vma_filesz = vmalloc(array_size(sizeof(*vma_filesz), (segs - 1))); - if (!vma_filesz) + vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)), + GFP_KERNEL); + if (ZERO_OR_NULL_PTR(vma_filesz)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; @@ -2402,7 +2403,7 @@ end_coredump: cleanup: free_note_info(&info); kfree(shdr4extnum); - vfree(vma_filesz); + kvfree(vma_filesz); kfree(phdr4note); kfree(elf); out: diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index ddbf87246898..1b8b44637e70 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -146,68 +146,82 @@ int _ore_get_io_state(struct ore_layout *layout, struct ore_io_state **pios) { struct ore_io_state *ios; - struct page **pages; - struct osd_sg_entry *sgilist; + size_t size_ios, size_extra, size_total; + void *ios_extra; + + /* + * The desired layout looks like this, with the extra_allocation + * items pointed at from fields within ios or per_dev: + struct __alloc_all_io_state { struct ore_io_state ios; struct ore_per_dev_state per_dev[numdevs]; union { struct osd_sg_entry sglist[sgs_per_dev * numdevs]; struct page *pages[num_par_pages]; - }; - } *_aios; - - if (likely(sizeof(*_aios) <= PAGE_SIZE)) { - _aios = kzalloc(sizeof(*_aios), GFP_KERNEL); - if (unlikely(!_aios)) { - ORE_DBGMSG("Failed kzalloc bytes=%zd\n", - sizeof(*_aios)); + } extra_allocation; + } whole_allocation; + + */ + + /* This should never happen, so abort early if it ever does. */ + if (sgs_per_dev && num_par_pages) { + ORE_DBGMSG("Tried to use both pages and sglist\n"); + *pios = NULL; + return -EINVAL; + } + + if (numdevs > (INT_MAX - sizeof(*ios)) / + sizeof(struct ore_per_dev_state)) + return -ENOMEM; + size_ios = sizeof(*ios) + sizeof(struct ore_per_dev_state) * numdevs; + + if (sgs_per_dev * numdevs > INT_MAX / sizeof(struct osd_sg_entry)) + return -ENOMEM; + if (num_par_pages > INT_MAX / sizeof(struct page *)) + return -ENOMEM; + size_extra = max(sizeof(struct osd_sg_entry) * (sgs_per_dev * numdevs), + sizeof(struct page *) * num_par_pages); + + size_total = size_ios + size_extra; + + if (likely(size_total <= PAGE_SIZE)) { + ios = kzalloc(size_total, GFP_KERNEL); + if (unlikely(!ios)) { + ORE_DBGMSG("Failed kzalloc bytes=%zd\n", size_total); *pios = NULL; return -ENOMEM; } - pages = num_par_pages ? _aios->pages : NULL; - sgilist = sgs_per_dev ? _aios->sglist : NULL; - ios = &_aios->ios; + ios_extra = (char *)ios + size_ios; } else { - struct __alloc_small_io_state { - struct ore_io_state ios; - struct ore_per_dev_state per_dev[numdevs]; - } *_aio_small; - union __extra_part { - struct osd_sg_entry sglist[sgs_per_dev * numdevs]; - struct page *pages[num_par_pages]; - } *extra_part; - - _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL); - if (unlikely(!_aio_small)) { + ios = kzalloc(size_ios, GFP_KERNEL); + if (unlikely(!ios)) { ORE_DBGMSG("Failed alloc first part bytes=%zd\n", - sizeof(*_aio_small)); + size_ios); *pios = NULL; return -ENOMEM; } - extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL); - if (unlikely(!extra_part)) { + ios_extra = kzalloc(size_extra, GFP_KERNEL); + if (unlikely(!ios_extra)) { ORE_DBGMSG("Failed alloc second part bytes=%zd\n", - sizeof(*extra_part)); - kfree(_aio_small); + size_extra); + kfree(ios); *pios = NULL; return -ENOMEM; } - pages = num_par_pages ? extra_part->pages : NULL; - sgilist = sgs_per_dev ? extra_part->sglist : NULL; /* In this case the per_dev[0].sgilist holds the pointer to * be freed */ - ios = &_aio_small->ios; ios->extra_part_alloc = true; } - if (pages) { - ios->parity_pages = pages; + if (num_par_pages) { + ios->parity_pages = ios_extra; ios->max_par_pages = num_par_pages; } - if (sgilist) { + if (sgs_per_dev) { + struct osd_sg_entry *sgilist = ios_extra; unsigned d; for (d = 0; d < numdevs; ++d) { diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 27cbdb697649..199590f36203 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -71,6 +71,11 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, { struct __stripe_pages_2d *sp2d; unsigned data_devs = group_width - parity; + + /* + * Desired allocation layout is, though when larger than PAGE_SIZE, + * each struct __alloc_1p_arrays is separately allocated: + struct _alloc_all_bytes { struct __alloc_stripe_pages_2d { struct __stripe_pages_2d sp2d; @@ -82,55 +87,85 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, char page_is_read[data_devs]; } __a1pa[pages_in_unit]; } *_aab; + struct __alloc_1p_arrays *__a1pa; struct __alloc_1p_arrays *__a1pa_end; - const unsigned sizeof__a1pa = sizeof(_aab->__a1pa[0]); + + */ + + char *__a1pa; + char *__a1pa_end; + + const size_t sizeof_stripe_pages_2d = + sizeof(struct __stripe_pages_2d) + + sizeof(struct __1_page_stripe) * pages_in_unit; + const size_t sizeof__a1pa = + ALIGN(sizeof(struct page *) * (2 * group_width) + data_devs, + sizeof(void *)); + const size_t sizeof__a1pa_arrays = sizeof__a1pa * pages_in_unit; + const size_t alloc_total = sizeof_stripe_pages_2d + + sizeof__a1pa_arrays; + unsigned num_a1pa, alloc_size, i; /* FIXME: check these numbers in ore_verify_layout */ - BUG_ON(sizeof(_aab->__asp2d) > PAGE_SIZE); + BUG_ON(sizeof_stripe_pages_2d > PAGE_SIZE); BUG_ON(sizeof__a1pa > PAGE_SIZE); - if (sizeof(*_aab) > PAGE_SIZE) { - num_a1pa = (PAGE_SIZE - sizeof(_aab->__asp2d)) / sizeof__a1pa; - alloc_size = sizeof(_aab->__asp2d) + sizeof__a1pa * num_a1pa; + /* + * If alloc_total would be larger than PAGE_SIZE, only allocate + * as many a1pa items as would fill the rest of the page, instead + * of the full pages_in_unit count. + */ + if (alloc_total > PAGE_SIZE) { + num_a1pa = (PAGE_SIZE - sizeof_stripe_pages_2d) / sizeof__a1pa; + alloc_size = sizeof_stripe_pages_2d + sizeof__a1pa * num_a1pa; } else { num_a1pa = pages_in_unit; - alloc_size = sizeof(*_aab); + alloc_size = alloc_total; } - _aab = kzalloc(alloc_size, GFP_KERNEL); - if (unlikely(!_aab)) { + *psp2d = sp2d = kzalloc(alloc_size, GFP_KERNEL); + if (unlikely(!sp2d)) { ORE_DBGMSG("!! Failed to alloc sp2d size=%d\n", alloc_size); return -ENOMEM; } + /* From here Just call _sp2d_free */ - sp2d = &_aab->__asp2d.sp2d; - *psp2d = sp2d; /* From here Just call _sp2d_free */ - - __a1pa = _aab->__a1pa; - __a1pa_end = __a1pa + num_a1pa; + /* Find start of a1pa area. */ + __a1pa = (char *)sp2d + sizeof_stripe_pages_2d; + /* Find end of the _allocated_ a1pa area. */ + __a1pa_end = __a1pa + alloc_size; + /* Allocate additionally needed a1pa items in PAGE_SIZE chunks. */ for (i = 0; i < pages_in_unit; ++i) { + struct __1_page_stripe *stripe = &sp2d->_1p_stripes[i]; + if (unlikely(__a1pa >= __a1pa_end)) { num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa, pages_in_unit - i); + alloc_size = sizeof__a1pa * num_a1pa; - __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL); + __a1pa = kzalloc(alloc_size, GFP_KERNEL); if (unlikely(!__a1pa)) { ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n", num_a1pa); return -ENOMEM; } - __a1pa_end = __a1pa + num_a1pa; + __a1pa_end = __a1pa + alloc_size; /* First *pages is marked for kfree of the buffer */ - sp2d->_1p_stripes[i].alloc = true; + stripe->alloc = true; } - sp2d->_1p_stripes[i].pages = __a1pa->pages; - sp2d->_1p_stripes[i].scribble = __a1pa->scribble ; - sp2d->_1p_stripes[i].page_is_read = __a1pa->page_is_read; - ++__a1pa; + /* + * Attach all _lp_stripes pointers to the allocation for + * it which was either part of the original PAGE_SIZE + * allocation or the subsequent allocation in this loop. + */ + stripe->pages = (void *)__a1pa; + stripe->scribble = stripe->pages + group_width; + stripe->page_is_read = (char *)stripe->scribble + group_width; + __a1pa += sizeof__a1pa; } sp2d->parity = parity; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 719a3152da80..41cf2fbee50d 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -549,27 +549,26 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, static int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, struct exofs_dev **peds) { - struct __alloc_ore_devs_and_exofs_devs { - /* Twice bigger table: See exofs_init_comps() and comment at - * exofs_read_lookup_dev_table() - */ - struct ore_dev *oreds[numdevs * 2 - 1]; - struct exofs_dev eds[numdevs]; - } *aoded; + /* Twice bigger table: See exofs_init_comps() and comment at + * exofs_read_lookup_dev_table() + */ + const size_t numores = numdevs * 2 - 1; struct exofs_dev *eds; unsigned i; - aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); - if (unlikely(!aoded)) { + sbi->oc.ods = kzalloc(numores * sizeof(struct ore_dev *) + + numdevs * sizeof(struct exofs_dev), GFP_KERNEL); + if (unlikely(!sbi->oc.ods)) { EXOFS_ERR("ERROR: failed allocating Device array[%d]\n", numdevs); return -ENOMEM; } - sbi->oc.ods = aoded->oreds; - *peds = eds = aoded->eds; + /* Start of allocated struct exofs_dev entries */ + *peds = eds = (void *)sbi->oc.ods[numores]; + /* Initialize pointers into struct exofs_dev */ for (i = 0; i < numdevs; ++i) - aoded->oreds[i] = &eds[i].ored; + sbi->oc.ods[i] = &eds[i].ored; return 0; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 13271ea2b453..065dc919a0ce 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -158,8 +158,14 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false); if (err) return err; + if (!phys) { + fat_fs_error(sb, + "invalid FAT chain (i_pos %lld, last_block %llu)", + MSDOS_I(inode)->i_pos, + (unsigned long long)last_block); + return -EIO; + } - BUG_ON(!phys); BUG_ON(*max_blocks != mapped_blocks); set_buffer_new(bh_result); map_bh(bh_result, sb, phys); diff --git a/fs/proc/base.c b/fs/proc/base.c index 80aa42506b8b..b6572944efc3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2439,14 +2439,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, for (p = ents; p < last; p++) { if (p->len != dentry->d_name.len) continue; - if (!memcmp(dentry->d_name.name, p->name, p->len)) + if (!memcmp(dentry->d_name.name, p->name, p->len)) { + res = proc_pident_instantiate(dentry, task, p); break; + } } - if (p >= last) - goto out; - - res = proc_pident_instantiate(dentry, task, p); -out: put_task_struct(task); out_no_task: return res; diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 3ecf6f5e3a5f..b76a1807028d 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -22,13 +22,27 @@ enum kcov_mode { KCOV_MODE_TRACE_CMP = 3, }; +#define KCOV_IN_CTXSW (1 << 30) + void kcov_task_init(struct task_struct *t); void kcov_task_exit(struct task_struct *t); +#define kcov_prepare_switch(t) \ +do { \ + (t)->kcov_mode |= KCOV_IN_CTXSW; \ +} while (0) + +#define kcov_finish_switch(t) \ +do { \ + (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ +} while (0) + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} +static inline void kcov_prepare_switch(struct task_struct *t) {} +static inline void kcov_finish_switch(struct task_struct *t) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4f52ec755725..6c6fb116e925 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -53,6 +53,7 @@ enum memcg_memory_event { MEMCG_HIGH, MEMCG_MAX, MEMCG_OOM, + MEMCG_OOM_KILL, MEMCG_SWAP_MAX, MEMCG_SWAP_FAIL, MEMCG_NR_MEMORY_EVENTS, @@ -720,11 +721,8 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (likely(memcg)) { + if (likely(memcg)) count_memcg_events(memcg, idx, 1); - if (idx == OOM_KILL) - cgroup_file_notify(&memcg->events_file); - } rcu_read_unlock(); } @@ -735,6 +733,21 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, cgroup_file_notify(&memcg->events_file); } +static inline void memcg_memory_event_mm(struct mm_struct *mm, + enum memcg_memory_event event) +{ + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (likely(memcg)) + memcg_memory_event(memcg, event); + rcu_read_unlock(); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void mem_cgroup_split_huge_fixup(struct page *head); #endif @@ -756,6 +769,11 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, { } +static inline void memcg_memory_event_mm(struct mm_struct *mm, + enum memcg_memory_event event) +{ +} + static inline enum mem_cgroup_protection mem_cgroup_protected( struct mem_cgroup *root, struct mem_cgroup *memcg) { diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb7da88c217..87bf02d93a27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1130,7 +1130,7 @@ struct task_struct { #ifdef CONFIG_KCOV /* Coverage collection mode enabled for this task (0 if disabled): */ - enum kcov_mode kcov_mode; + unsigned int kcov_mode; /* Size of the kcov_area: */ unsigned int kcov_size; diff --git a/include/linux/slab.h b/include/linux/slab.h index 4d759e1ddc33..14e3fe4bd6a1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -600,6 +600,7 @@ struct memcg_cache_params { struct memcg_cache_array __rcu *memcg_caches; struct list_head __root_caches_node; struct list_head children; + bool dying; }; struct { struct mem_cgroup *memcg; diff --git a/ipc/sem.c b/ipc/sem.c index 59a3cd1d3252..5af1943ad782 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -85,6 +85,7 @@ #include <linux/nsproxy.h> #include <linux/ipc_namespace.h> #include <linux/sched/wake_q.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include "util.h" @@ -368,6 +369,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, int nsops) { struct sem *sem; + int idx; if (nsops != 1) { /* Complex operation - acquire a full lock */ @@ -385,7 +387,8 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * * Both facts are tracked by use_global_mode. */ - sem = &sma->sems[sops->sem_num]; + idx = array_index_nospec(sops->sem_num, sma->sem_nsems); + sem = &sma->sems[idx]; /* * Initial check for use_global_lock. Just an optimization, @@ -638,7 +641,8 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) un = q->undo; for (sop = sops; sop < sops + nsops; sop++) { - curr = &sma->sems[sop->sem_num]; + int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); + curr = &sma->sems[idx]; sem_op = sop->sem_op; result = curr->semval; @@ -718,7 +722,9 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) * until the operations can go through. */ for (sop = sops; sop < sops + nsops; sop++) { - curr = &sma->sems[sop->sem_num]; + int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); + + curr = &sma->sems[idx]; sem_op = sop->sem_op; result = curr->semval; @@ -1356,6 +1362,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, return -EIDRM; } + semnum = array_index_nospec(semnum, sma->sem_nsems); curr = &sma->sems[semnum]; ipc_assert_locked_object(&sma->sem_perm); @@ -1509,6 +1516,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, err = -EIDRM; goto out_unlock; } + + semnum = array_index_nospec(semnum, nsems); curr = &sma->sems[semnum]; switch (cmd) { @@ -2081,7 +2090,8 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, */ if (nsops == 1) { struct sem *curr; - curr = &sma->sems[sops->sem_num]; + int idx = array_index_nospec(sops->sem_num, sma->sem_nsems); + curr = &sma->sems[idx]; if (alter) { if (sma->complex_count) { diff --git a/ipc/shm.c b/ipc/shm.c index 29978ee76c2e..051a3e1fb8df 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -408,7 +408,7 @@ void exit_shm(struct task_struct *task) up_write(&shm_ids(ns).rwsem); } -static int shm_fault(struct vm_fault *vmf) +static vm_fault_t shm_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); diff --git a/kernel/fork.c b/kernel/fork.c index 92870be50bba..9440d61b925c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -440,6 +440,14 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, continue; } charge = 0; + /* + * Don't duplicate many vmas if we've been oom-killed (for + * example) + */ + if (fatal_signal_pending(current)) { + retval = -EINTR; + goto out; + } if (mpnt->vm_flags & VM_ACCOUNT) { unsigned long len = vma_pages(mpnt); diff --git a/kernel/kcov.c b/kernel/kcov.c index 2c16f1ab5e10..3ebd09efe72a 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -58,7 +58,7 @@ struct kcov { static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { - enum kcov_mode mode; + unsigned int mode; /* * We are interested in code coverage as a function of a syscall inputs, @@ -241,7 +241,8 @@ static void kcov_put(struct kcov *kcov) void kcov_task_init(struct task_struct *t) { - t->kcov_mode = KCOV_MODE_DISABLED; + WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); + barrier(); t->kcov_size = 0; t->kcov_area = NULL; t->kcov = NULL; @@ -323,6 +324,21 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +/* + * Fault in a lazily-faulted vmalloc area before it can be used by + * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the + * vmalloc fault handling path is instrumented. + */ +static void kcov_fault_in_area(struct kcov *kcov) +{ + unsigned long stride = PAGE_SIZE / sizeof(unsigned long); + unsigned long *area = kcov->area; + unsigned long offset; + + for (offset = 0; offset < kcov->size; offset += stride) + READ_ONCE(area[offset]); +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { @@ -371,6 +387,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, #endif else return -EINVAL; + kcov_fault_in_area(kcov); /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 20fef1a38602..23a83a4da38a 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -829,6 +829,8 @@ static int kimage_load_normal_segment(struct kimage *image, else buf += mchunk; mbytes -= mchunk; + + cond_resched(); } out: return result; @@ -893,6 +895,8 @@ static int kimage_load_crash_segment(struct kimage *image, else buf += mchunk; mbytes -= mchunk; + + cond_resched(); } out: return result; diff --git a/kernel/relay.c b/kernel/relay.c index 9f5326e8a036..04f248644e06 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -39,7 +39,7 @@ static void relay_file_mmap_close(struct vm_area_struct *vma) /* * fault() vm_op implementation for relay file mapping. */ -static int relay_buf_fault(struct vm_fault *vmf) +static vm_fault_t relay_buf_fault(struct vm_fault *vmf) { struct page *page; struct rchan_buf *buf = vmf->vma->vm_private_data; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a98d54cd5535..78d8facba456 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10,6 +10,8 @@ #include <linux/kthread.h> #include <linux/nospec.h> +#include <linux/kcov.h> + #include <asm/switch_to.h> #include <asm/tlb.h> @@ -2633,6 +2635,7 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + kcov_prepare_switch(prev); sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); rseq_preempt(prev); @@ -2702,6 +2705,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) finish_task(prev); finish_lock_switch(rq); finish_arch_post_lock_switch(); + kcov_finish_switch(current); fire_sched_in_preempt_notifiers(current); /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d543c65ce0eb..8838d1158d19 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1506,6 +1506,10 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. +config FUNCTION_ERROR_INJECTION + def_bool y + depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + config FAULT_INJECTION bool "Fault-injection framework" depends on DEBUG_KERNEL @@ -1513,10 +1517,6 @@ config FAULT_INJECTION Provide fault-injection framework. For more details, see Documentation/fault-injection/. -config FUNCTION_ERROR_INJECTION - def_bool y - depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES - config FAILSLAB bool "Fault-injection capability for kmalloc" depends on FAULT_INJECTION @@ -1547,16 +1547,6 @@ config FAIL_IO_TIMEOUT Only works with drivers that use the generic timeout handling, for others it wont do anything. -config FAIL_MMC_REQUEST - bool "Fault-injection capability for MMC IO" - depends on FAULT_INJECTION_DEBUG_FS && MMC - help - Provide fault-injection capability for MMC IO. - This will make the mmc core return data errors. This is - useful to test the error handling in the mmc block device - and to test how the mmc host driver handles retries from - the block device. - config FAIL_FUTEX bool "Fault-injection capability for futexes" select DEBUG_FS @@ -1564,6 +1554,12 @@ config FAIL_FUTEX help Provide fault-injection capability for futexes. +config FAULT_INJECTION_DEBUG_FS + bool "Debugfs entries for fault-injection capabilities" + depends on FAULT_INJECTION && SYSFS && DEBUG_FS + help + Enable configuration of fault-injection capabilities via debugfs. + config FAIL_FUNCTION bool "Fault-injection capability for functions" depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION @@ -1574,11 +1570,15 @@ config FAIL_FUNCTION an error value and have to handle it. This is useful to test the error handling in various subsystems. -config FAULT_INJECTION_DEBUG_FS - bool "Debugfs entries for fault-injection capabilities" - depends on FAULT_INJECTION && SYSFS && DEBUG_FS +config FAIL_MMC_REQUEST + bool "Fault-injection capability for MMC IO" + depends on FAULT_INJECTION_DEBUG_FS && MMC help - Enable configuration of fault-injection capabilities via debugfs. + Provide fault-injection capability for MMC IO. + This will make the mmc core return data errors. This is + useful to test the error handling in the mmc block device + and to test how the mmc host driver handles retries from + the block device. config FAULT_INJECTION_STACKTRACE_FILTER bool "stacktrace filter for fault-injection capabilities" diff --git a/lib/test_printf.c b/lib/test_printf.c index cea592f402ed..b2aa8f514844 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -260,6 +260,13 @@ plain(void) { int err; + /* + * Make sure crng is ready. Otherwise we get "(ptrval)" instead + * of a hashed address when printing '%p' in plain_hash() and + * plain_format(). + */ + wait_for_random_bytes(); + err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); diff --git a/mm/cleancache.c b/mm/cleancache.c index 126548b5a292..2bf12da9baa0 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -307,12 +307,10 @@ static int __init init_cleancache(void) struct dentry *root = debugfs_create_dir("cleancache", NULL); if (root == NULL) return -ENXIO; - debugfs_create_u64("succ_gets", S_IRUGO, root, &cleancache_succ_gets); - debugfs_create_u64("failed_gets", S_IRUGO, - root, &cleancache_failed_gets); - debugfs_create_u64("puts", S_IRUGO, root, &cleancache_puts); - debugfs_create_u64("invalidates", S_IRUGO, - root, &cleancache_invalidates); + debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets); + debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets); + debugfs_create_u64("puts", 0444, root, &cleancache_puts); + debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates); #endif return 0; } diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 275df8b5b22e..f23467291cfb 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -172,23 +172,18 @@ static void cma_debugfs_add_one(struct cma *cma, int idx) tmp = debugfs_create_dir(name, cma_debugfs_root); - debugfs_create_file("alloc", S_IWUSR, tmp, cma, - &cma_alloc_fops); - - debugfs_create_file("free", S_IWUSR, tmp, cma, - &cma_free_fops); - - debugfs_create_file("base_pfn", S_IRUGO, tmp, - &cma->base_pfn, &cma_debugfs_fops); - debugfs_create_file("count", S_IRUGO, tmp, - &cma->count, &cma_debugfs_fops); - debugfs_create_file("order_per_bit", S_IRUGO, tmp, - &cma->order_per_bit, &cma_debugfs_fops); - debugfs_create_file("used", S_IRUGO, tmp, cma, &cma_used_fops); - debugfs_create_file("maxchunk", S_IRUGO, tmp, cma, &cma_maxchunk_fops); + debugfs_create_file("alloc", 0200, tmp, cma, &cma_alloc_fops); + debugfs_create_file("free", 0200, tmp, cma, &cma_free_fops); + debugfs_create_file("base_pfn", 0444, tmp, + &cma->base_pfn, &cma_debugfs_fops); + debugfs_create_file("count", 0444, tmp, &cma->count, &cma_debugfs_fops); + debugfs_create_file("order_per_bit", 0444, tmp, + &cma->order_per_bit, &cma_debugfs_fops); + debugfs_create_file("used", 0444, tmp, cma, &cma_used_fops); + debugfs_create_file("maxchunk", 0444, tmp, cma, &cma_maxchunk_fops); u32s = DIV_ROUND_UP(cma_bitmap_maxno(cma), BITS_PER_BYTE * sizeof(u32)); - debugfs_create_u32_array("bitmap", S_IRUGO, tmp, (u32*)cma->bitmap, u32s); + debugfs_create_u32_array("bitmap", 0444, tmp, (u32 *)cma->bitmap, u32s); } static int __init cma_debugfs_init(void) diff --git a/mm/compaction.c b/mm/compaction.c index 29bd1df18b98..faca45ebe62d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1899,7 +1899,7 @@ static ssize_t sysfs_compact_node(struct device *dev, return count; } -static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); +static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node); int compaction_register_node(struct node *node) { diff --git a/mm/dmapool.c b/mm/dmapool.c index 4d90a64b2fdc..6d4b97e7e9e9 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -105,7 +105,7 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf) return PAGE_SIZE - size; } -static DEVICE_ATTR(pools, S_IRUGO, show_pools, NULL); +static DEVICE_ATTR(pools, 0444, show_pools, NULL); /** * dma_pool_create - Creates a pool of consistent memory blocks, for dma. diff --git a/mm/failslab.c b/mm/failslab.c index 1f2f248e3601..b135ebb88b6f 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -42,7 +42,7 @@ __setup("failslab=", setup_failslab); static int __init failslab_debugfs_init(void) { struct dentry *dir; - umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | 0600; dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); if (IS_ERR(dir)) diff --git a/mm/frontswap.c b/mm/frontswap.c index 4f5476a0f955..157e5bf63504 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -486,12 +486,11 @@ static int __init init_frontswap(void) struct dentry *root = debugfs_create_dir("frontswap", NULL); if (root == NULL) return -ENXIO; - debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); - debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); - debugfs_create_u64("failed_stores", S_IRUGO, root, - &frontswap_failed_stores); - debugfs_create_u64("invalidates", S_IRUGO, - root, &frontswap_invalidates); + debugfs_create_u64("loads", 0444, root, &frontswap_loads); + debugfs_create_u64("succ_stores", 0444, root, &frontswap_succ_stores); + debugfs_create_u64("failed_stores", 0444, root, + &frontswap_failed_stores); + debugfs_create_u64("invalidates", 0444, root, &frontswap_invalidates); #endif return 0; } @@ -216,6 +216,8 @@ struct rmap_item { #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ #define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */ #define STABLE_FLAG 0x200 /* is listed from the stable tree */ +#define KSM_FLAG_MASK (SEQNR_MASK|UNSTABLE_FLAG|STABLE_FLAG) + /* to mask all the flags */ /* The stable and unstable tree heads */ static struct rb_root one_stable_tree[1] = { RB_ROOT }; @@ -2598,10 +2600,15 @@ again: anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { + unsigned long addr; + cond_resched(); vma = vmac->vma; - if (rmap_item->address < vma->vm_start || - rmap_item->address >= vma->vm_end) + + /* Ignore the stable/unstable/sqnr flags */ + addr = rmap_item->address & ~KSM_FLAG_MASK; + + if (addr < vma->vm_start || addr >= vma->vm_end) continue; /* * Initially we examine only the vma which covers this @@ -2615,8 +2622,7 @@ again: if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; - if (!rwc->rmap_one(page, vma, - rmap_item->address, rwc->arg)) { + if (!rwc->rmap_one(page, vma, addr, rwc->arg)) { anon_vma_unlock_read(anon_vma); return; } diff --git a/mm/memblock.c b/mm/memblock.c index 93ad42bc8a73..cc16d70b8333 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,6 +20,7 @@ #include <linux/kmemleak.h> #include <linux/seq_file.h> #include <linux/memblock.h> +#include <linux/bootmem.h> #include <asm/sections.h> #include <linux/io.h> @@ -1808,10 +1809,13 @@ static int __init memblock_init_debugfs(void) struct dentry *root = debugfs_create_dir("memblock", NULL); if (!root) return -ENXIO; - debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops); - debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops); + debugfs_create_file("memory", 0444, root, + &memblock.memory, &memblock_debug_fops); + debugfs_create_file("reserved", 0444, root, + &memblock.reserved, &memblock_debug_fops); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - debugfs_create_file("physmem", S_IRUGO, root, &memblock.physmem, &memblock_debug_fops); + debugfs_create_file("physmem", 0444, root, + &memblock.physmem, &memblock_debug_fops); #endif return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c1e64d60ed02..e6f0d5ef320a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3550,7 +3550,8 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); - seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); + seq_printf(sf, "oom_kill %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); return 0; } @@ -5239,7 +5240,8 @@ static int memory_events_show(struct seq_file *m, void *v) atomic_long_read(&memcg->memory_events[MEMCG_MAX])); seq_printf(m, "oom %lu\n", atomic_long_read(&memcg->memory_events[MEMCG_OOM])); - seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); + seq_printf(m, "oom_kill %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); return 0; } @@ -5480,6 +5482,10 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, elow = memcg->memory.low; parent = parent_mem_cgroup(memcg); + /* No parent means a non-hierarchical mode on v1 memcg */ + if (!parent) + return MEMCG_PROT_NONE; + if (parent == root) goto exit; diff --git a/mm/mremap.c b/mm/mremap.c index 049470aa1e3e..5c2e18505f75 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -191,8 +191,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } -#define LATENCY_LIMIT (64 * PAGE_SIZE) - unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, @@ -247,8 +245,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, next = (new_addr + PMD_SIZE) & PMD_MASK; if (extent > next - new_addr) extent = next - new_addr; - if (extent > LATENCY_LIMIT) - extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, new_pmd, new_addr, need_rmap_locks, &need_flush); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6694348b27e9..84081e77bc51 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -913,7 +913,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* Raise event before sending signal: task reaper must see this */ count_vm_event(OOM_KILL); - count_memcg_event_mm(mm, OOM_KILL); + memcg_memory_event_mm(mm, MEMCG_OOM_KILL); /* * We should send SIGKILL before granting access to memory reserves diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07b3c23762ad..1521100f1e63 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3061,7 +3061,7 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) static int __init fail_page_alloc_debugfs(void) { - umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | 0600; struct dentry *dir; dir = fault_create_debugfs_attr("fail_page_alloc", NULL, diff --git a/mm/page_idle.c b/mm/page_idle.c index e412a63b2b74..6302bc62c27d 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -201,7 +201,7 @@ static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj, } static struct bin_attribute page_idle_bitmap_attr = - __BIN_ATTR(bitmap, S_IRUSR | S_IWUSR, + __BIN_ATTR(bitmap, 0600, page_idle_bitmap_read, page_idle_bitmap_write, 0); static struct bin_attribute *page_idle_bin_attrs[] = { diff --git a/mm/page_owner.c b/mm/page_owner.c index 75d21a2259b3..d80adfe702d3 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -631,8 +631,8 @@ static int __init pageowner_init(void) return 0; } - dentry = debugfs_create_file("page_owner", S_IRUSR, NULL, - NULL, &proc_page_owner_operations); + dentry = debugfs_create_file("page_owner", 0400, NULL, + NULL, &proc_page_owner_operations); return PTR_ERR_OR_ZERO(dentry); } diff --git a/mm/shmem.c b/mm/shmem.c index e9a7ac74823d..2cab84403055 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3013,7 +3013,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s if (len > PAGE_SIZE) return -ENAMETOOLONG; - inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE); + inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK | 0777, 0, + VM_NORESERVE); if (!inode) return -ENOSPC; @@ -3445,7 +3446,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) sbinfo->max_blocks << (PAGE_SHIFT - 10)); if (sbinfo->max_inodes != shmem_default_max_inodes()) seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); - if (sbinfo->mode != (S_IRWXUGO | S_ISVTX)) + if (sbinfo->mode != (0777 | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) seq_printf(seq, ",uid=%u", @@ -3486,7 +3487,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) if (!sbinfo) return -ENOMEM; - sbinfo->mode = S_IRWXUGO | S_ISVTX; + sbinfo->mode = 0777 | S_ISVTX; sbinfo->uid = current_fsuid(); sbinfo->gid = current_fsgid(); sb->s_fs_info = sbinfo; @@ -3929,7 +3930,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l d_set_d_op(path.dentry, &anon_ops); res = ERR_PTR(-ENOSPC); - inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); + inode = shmem_get_inode(sb, NULL, S_IFREG | 0777, 0, flags); if (!inode) goto put_memory; diff --git a/mm/slab_common.c b/mm/slab_common.c index 98dcdc352062..890b1f04a03a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -136,6 +136,7 @@ void slab_init_memcg_params(struct kmem_cache *s) s->memcg_params.root_cache = NULL; RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); INIT_LIST_HEAD(&s->memcg_params.children); + s->memcg_params.dying = false; } static int init_memcg_params(struct kmem_cache *s, @@ -608,7 +609,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, * The memory cgroup could have been offlined while the cache * creation work was pending. */ - if (memcg->kmem_state != KMEM_ONLINE) + if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying) goto out_unlock; idx = memcg_cache_id(memcg); @@ -712,6 +713,9 @@ void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, WARN_ON_ONCE(s->memcg_params.deact_fn)) return; + if (s->memcg_params.root_cache->memcg_params.dying) + return; + /* pin memcg so that @s doesn't get destroyed in the middle */ css_get(&s->memcg_params.memcg->css); @@ -823,11 +827,36 @@ static int shutdown_memcg_caches(struct kmem_cache *s) return -EBUSY; return 0; } + +static void flush_memcg_workqueue(struct kmem_cache *s) +{ + mutex_lock(&slab_mutex); + s->memcg_params.dying = true; + mutex_unlock(&slab_mutex); + + /* + * SLUB deactivates the kmem_caches through call_rcu_sched. Make + * sure all registered rcu callbacks have been invoked. + */ + if (IS_ENABLED(CONFIG_SLUB)) + rcu_barrier_sched(); + + /* + * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB + * deactivates the memcg kmem_caches through workqueue. Make sure all + * previous workitems on workqueue are processed. + */ + flush_workqueue(memcg_kmem_cache_wq); +} #else static inline int shutdown_memcg_caches(struct kmem_cache *s) { return 0; } + +static inline void flush_memcg_workqueue(struct kmem_cache *s) +{ +} #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ void slab_kmem_cache_release(struct kmem_cache *s) @@ -845,6 +874,8 @@ void kmem_cache_destroy(struct kmem_cache *s) if (unlikely(!s)) return; + flush_memcg_workqueue(s); + get_online_cpus(); get_online_mems(); @@ -1212,9 +1243,9 @@ void cache_random_seq_destroy(struct kmem_cache *cachep) #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) #ifdef CONFIG_SLAB -#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) +#define SLABINFO_RIGHTS (0600) #else -#define SLABINFO_RIGHTS S_IRUSR +#define SLABINFO_RIGHTS (0400) #endif static void print_slabinfo_header(struct seq_file *m) diff --git a/mm/swapfile.c b/mm/swapfile.c index 925cf795a652..2cc2972eedaf 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -100,7 +100,7 @@ atomic_t nr_rotate_swap = ATOMIC_INIT(0); static inline unsigned char swap_count(unsigned char ent) { - return ent & ~SWAP_HAS_CACHE; /* may include SWAP_HAS_CONT flag */ + return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ } /* returns 1 if swap entry is freed */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 89efac3a020e..cfea25be7754 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2741,11 +2741,11 @@ static const struct seq_operations vmalloc_op = { static int __init proc_vmalloc_init(void) { if (IS_ENABLED(CONFIG_NUMA)) - proc_create_seq_private("vmallocinfo", S_IRUSR, NULL, + proc_create_seq_private("vmallocinfo", 0400, NULL, &vmalloc_op, nr_node_ids * sizeof(unsigned int), NULL); else - proc_create_seq("vmallocinfo", S_IRUSR, NULL, &vmalloc_op); + proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op); return 0; } module_init(proc_vmalloc_init); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 61cb05dc950c..8d87e973a4f5 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -661,8 +661,9 @@ static void zs_pool_stat_create(struct zs_pool *pool, const char *name) } pool->stat_dentry = entry; - entry = debugfs_create_file("classes", S_IFREG | S_IRUGO, - pool->stat_dentry, pool, &zs_stats_size_fops); + entry = debugfs_create_file("classes", S_IFREG | 0444, + pool->stat_dentry, pool, + &zs_stats_size_fops); if (!entry) { pr_warn("%s: debugfs file entry <%s> creation failed\n", name, "classes"); diff --git a/mm/zswap.c b/mm/zswap.c index 61a5c41972db..7d34e69507e3 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1256,26 +1256,26 @@ static int __init zswap_debugfs_init(void) if (!zswap_debugfs_root) return -ENOMEM; - debugfs_create_u64("pool_limit_hit", S_IRUGO, - zswap_debugfs_root, &zswap_pool_limit_hit); - debugfs_create_u64("reject_reclaim_fail", S_IRUGO, - zswap_debugfs_root, &zswap_reject_reclaim_fail); - debugfs_create_u64("reject_alloc_fail", S_IRUGO, - zswap_debugfs_root, &zswap_reject_alloc_fail); - debugfs_create_u64("reject_kmemcache_fail", S_IRUGO, - zswap_debugfs_root, &zswap_reject_kmemcache_fail); - debugfs_create_u64("reject_compress_poor", S_IRUGO, - zswap_debugfs_root, &zswap_reject_compress_poor); - debugfs_create_u64("written_back_pages", S_IRUGO, - zswap_debugfs_root, &zswap_written_back_pages); - debugfs_create_u64("duplicate_entry", S_IRUGO, - zswap_debugfs_root, &zswap_duplicate_entry); - debugfs_create_u64("pool_total_size", S_IRUGO, - zswap_debugfs_root, &zswap_pool_total_size); - debugfs_create_atomic_t("stored_pages", S_IRUGO, - zswap_debugfs_root, &zswap_stored_pages); + debugfs_create_u64("pool_limit_hit", 0444, + zswap_debugfs_root, &zswap_pool_limit_hit); + debugfs_create_u64("reject_reclaim_fail", 0444, + zswap_debugfs_root, &zswap_reject_reclaim_fail); + debugfs_create_u64("reject_alloc_fail", 0444, + zswap_debugfs_root, &zswap_reject_alloc_fail); + debugfs_create_u64("reject_kmemcache_fail", 0444, + zswap_debugfs_root, &zswap_reject_kmemcache_fail); + debugfs_create_u64("reject_compress_poor", 0444, + zswap_debugfs_root, &zswap_reject_compress_poor); + debugfs_create_u64("written_back_pages", 0444, + zswap_debugfs_root, &zswap_written_back_pages); + debugfs_create_u64("duplicate_entry", 0444, + zswap_debugfs_root, &zswap_duplicate_entry); + debugfs_create_u64("pool_total_size", 0444, + zswap_debugfs_root, &zswap_pool_total_size); + debugfs_create_atomic_t("stored_pages", 0444, + zswap_debugfs_root, &zswap_stored_pages); debugfs_create_atomic_t("same_filled_pages", 0444, - zswap_debugfs_root, &zswap_same_filled_pages); + zswap_debugfs_root, &zswap_same_filled_pages); return 0; } |