From f2db19719a4e789a58ac024b43f12eeb9e458074 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 24 Jun 2016 14:49:28 -0700 Subject: jbd2: get rid of superfluous __GFP_REPEAT jbd2_alloc is explicit about its allocation preferences wrt. the allocation size. Sub page allocations go to the slab allocator and larger are using either the page allocator or vmalloc. This is all good but the logic is unnecessarily complex. 1) as per Ted, the vmalloc fallback is a left-over: : jbd2_alloc is only passed in the bh->b_size, which can't be PAGE_SIZE, so : the code path that calls vmalloc() should never get called. When we : conveted jbd2_alloc() to suppor sub-page size allocations in commit : d2eecb039368, there was an assumption that it could be called with a size : greater than PAGE_SIZE, but that's certaily not true today. Moreover vmalloc allocation might even lead to a deadlock because the callers expect GFP_NOFS context while vmalloc is GFP_KERNEL. 2) __GFP_REPEAT for requests <= PAGE_ALLOC_COSTLY_ORDER is ignored since the flag was introduced. Let's simplify the code flow and use the slab allocator for sub-page requests and the page allocator for others. Even though order > 0 is not currently used as per above leave that option open. Link: http://lkml.kernel.org/r/1464599699-30131-18-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Jan Kara Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/journal.c | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b31852f76f46..e3ca4b4cac84 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2329,18 +2329,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) BUG_ON(size & (size-1)); /* Must be a power of 2 */ - flags |= __GFP_REPEAT; - if (size == PAGE_SIZE) - ptr = (void *)__get_free_pages(flags, 0); - else if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - ptr = (void *)__get_free_pages(flags, order); - else - ptr = vmalloc(size); - } else + if (size < PAGE_SIZE) ptr = kmem_cache_alloc(get_slab(size), flags); + else + ptr = (void *)__get_free_pages(flags, get_order(size)); /* Check alignment; SLUB has gotten this wrong in the past, * and this can lead to user data corruption! */ @@ -2351,20 +2343,10 @@ void *jbd2_alloc(size_t size, gfp_t flags) void jbd2_free(void *ptr, size_t size) { - if (size == PAGE_SIZE) { - free_pages((unsigned long)ptr, 0); - return; - } - if (size > PAGE_SIZE) { - int order = get_order(size); - - if (order < 3) - free_pages((unsigned long)ptr, order); - else - vfree(ptr); - return; - } - kmem_cache_free(get_slab(size), ptr); + if (size < PAGE_SIZE) + kmem_cache_free(get_slab(size), ptr); + else + free_pages((unsigned long)ptr, get_order(size)); }; /* -- cgit v1.2.3 From 7186ee06b66313dae0d34ec5241fda7c4a537cb9 Mon Sep 17 00:00:00 2001 From: Gang He Date: Fri, 24 Jun 2016 14:50:13 -0700 Subject: ocfs2: disable BUG assertions in reading blocks According to some high-load testing, these two BUG assertions were encountered, this led system panic. Actually, there were some discussions about removing these two BUG() assertions, it would not bring any side effect. Then, I did the the following changes, 1) use the existing macro CATCH_BH_JBD_RACES to wrap BUG() in the ocfs2_read_blocks_sync function like before. 2) disable the macro CATCH_BH_JBD_RACES in Makefile by default. Link: http://lkml.kernel.org/r/1466574294-26863-1-git-send-email-ghe@suse.com Signed-off-by: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/Makefile | 2 -- fs/ocfs2/buffer_head_io.c | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index e27e6527912b..4342c7ee7d20 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -1,7 +1,5 @@ ccflags-y := -Ifs/ocfs2 -ccflags-y += -DCATCH_BH_JBD_RACES - obj-$(CONFIG_OCFS2_FS) += \ ocfs2.o \ ocfs2_stackglue.o diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index fe50ded1b4ce..498641eed2db 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -139,11 +139,16 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, lock_buffer(bh); if (buffer_jbd(bh)) { +#ifdef CATCH_BH_JBD_RACES mlog(ML_ERROR, "block %llu had the JBD bit set " "while I was in lock_buffer!", (unsigned long long)bh->b_blocknr); BUG(); +#else + unlock_buffer(bh); + continue; +#endif } clear_buffer_uptodate(bh); -- cgit v1.2.3 From 63d2f95d63396059200c391ca87161897b99e74a Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Fri, 24 Jun 2016 14:50:18 -0700 Subject: fs/nilfs2: fix potential underflow in call to crc32_le The value `bytes' comes from the filesystem which is about to be mounted. We cannot trust that the value is always in the range we expect it to be. Check its value before using it to calculate the length for the crc32_le call. It value must be larger (or equal) sumoff + 4. This fixes a kernel bug when accidentially mounting an image file which had the nilfs2 magic value 0x3434 at the right offset 0x406 by chance. The bytes 0x01 0x00 were stored at 0x408 and were interpreted as a s_bytes value of 1. This caused an underflow when substracting sumoff + 4 (20) in the call to crc32_le. BUG: unable to handle kernel paging request at ffff88021e600000 IP: crc32_le+0x36/0x100 ... Call Trace: nilfs_valid_sb.part.5+0x52/0x60 [nilfs2] nilfs_load_super_block+0x142/0x300 [nilfs2] init_nilfs+0x60/0x390 [nilfs2] nilfs_mount+0x302/0x520 [nilfs2] mount_fs+0x38/0x160 vfs_kern_mount+0x67/0x110 do_mount+0x269/0xe00 SyS_mount+0x9f/0x100 entry_SYSCALL_64_fastpath+0x16/0x71 Link: http://lkml.kernel.org/r/1466778587-5184-2-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Torsten Hilbrich Tested-by: Torsten Hilbrich Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/the_nilfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 809bd2de7ad0..e9fd241b9a0a 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) return 0; bytes = le16_to_cpu(sbp->s_bytes); - if (bytes > BLOCK_SIZE) + if (bytes < sumoff + 4 || bytes > BLOCK_SIZE) return 0; crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, sumoff); -- cgit v1.2.3 From 5a9294e5c535deab69831076af15cd35e1c95f8b Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 24 Jun 2016 14:50:27 -0700 Subject: autofs: don't get stuck in a loop if vfs_write() returns an error __vfs_write() returns a negative value in a error case. Link: http://lkml.kernel.org/r/20160616083108.6278.65815.stgit@pluto.themaw.net Signed-off-by: Andrey Vagin Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/waitq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 0146d911f468..631f1554c87b 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi, set_fs(KERNEL_DS); mutex_lock(&sbi->pipe_mutex); - wr = __vfs_write(file, data, bytes, &file->f_pos); - while (bytes && wr) { + while (bytes) { + wr = __vfs_write(file, data, bytes, &file->f_pos); + if (wr <= 0) + break; data += wr; bytes -= wr; - wr = __vfs_write(file, data, bytes, &file->f_pos); } mutex_unlock(&sbi->pipe_mutex); -- cgit v1.2.3