From 73601ea5b7b18eb234219ae2adf77530f389da79 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 28 Mar 2019 20:43:30 -0700 Subject: fs/open.c: allow opening only regular files during execve() syzbot is hitting lockdep warning [1] due to trying to open a fifo during an execve() operation. But we don't need to open non regular files during an execve() operation, for all files which we will need are the executable file itself and the interpreter programs like /bin/sh and ld-linux.so.2 . Since the manpage for execve(2) says that execve() returns EACCES when the file or a script interpreter is not a regular file, and the manpage for uselib(2) says that uselib() can return EACCES, and we use FMODE_EXEC when opening for execve()/uselib(), we can bail out if a non regular file is requested with FMODE_EXEC set. Since this deadlock followed by khungtaskd warnings is trivially reproducible by a local unprivileged user, and syzbot's frequent crash due to this deadlock defers finding other bugs, let's workaround this deadlock until we get a chance to find a better solution. [1] https://syzkaller.appspot.com/bug?id=b5095bfec44ec84213bac54742a82483aad578ce Link: http://lkml.kernel.org/r/1552044017-7890-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Reported-by: syzbot Fixes: 8924feff66f35fe2 ("splice: lift pipe_lock out of splice_to_pipe()") Signed-off-by: Tetsuo Handa Acked-by: Kees Cook Cc: Al Viro Cc: Eric Biggers Cc: Dmitry Vyukov Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/open.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 0285ce7dbd51..f1c2f855fd43 100644 --- a/fs/open.c +++ b/fs/open.c @@ -733,6 +733,12 @@ static int do_dentry_open(struct file *f, return 0; } + /* Any file opened for execve()/uselib() has to be a regular file. */ + if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { + error = -EACCES; + goto cleanup_file; + } + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) -- cgit v1.2.3 From e6a9467ea14bae8691b0f72c500510c42ea8edb8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 28 Mar 2019 20:43:38 -0700 Subject: ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock ocfs2_reflink_inodes_lock() can swap the inode1/inode2 variables so that we always grab cluster locks in order of increasing inode number. Unfortunately, we forget to swap the inode record buffer head pointers when we've done this, which leads to incorrect bookkeepping when we're trying to make the two inodes have the same refcount tree. This has the effect of causing filesystem shutdowns if you're trying to reflink data from inode 100 into inode 97, where inode 100 already has a refcount tree attached and inode 97 doesn't. The reflink code decides to copy the refcount tree pointer from 100 to 97, but uses inode 97's inode record to open the tree root (which it doesn't have) and blows up. This issue causes filesystem shutdowns and metadata corruption! Link: http://lkml.kernel.org/r/20190312214910.GK20533@magnolia Fixes: 29ac8e856cb369 ("ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features") Signed-off-by: Darrick J. Wong Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/refcounttree.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index a35259eebc56..1dc9a08e8bdc 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4719,22 +4719,23 @@ out: /* Lock an inode and grab a bh pointing to the inode. */ int ocfs2_reflink_inodes_lock(struct inode *s_inode, - struct buffer_head **bh1, + struct buffer_head **bh_s, struct inode *t_inode, - struct buffer_head **bh2) + struct buffer_head **bh_t) { - struct inode *inode1; - struct inode *inode2; + struct inode *inode1 = s_inode; + struct inode *inode2 = t_inode; struct ocfs2_inode_info *oi1; struct ocfs2_inode_info *oi2; + struct buffer_head *bh1 = NULL; + struct buffer_head *bh2 = NULL; bool same_inode = (s_inode == t_inode); + bool need_swap = (inode1->i_ino > inode2->i_ino); int status; /* First grab the VFS and rw locks. */ lock_two_nondirectories(s_inode, t_inode); - inode1 = s_inode; - inode2 = t_inode; - if (inode1->i_ino > inode2->i_ino) + if (need_swap) swap(inode1, inode2); status = ocfs2_rw_lock(inode1, 1); @@ -4757,17 +4758,13 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, (unsigned long long)oi2->ip_blkno); - if (*bh1) - *bh1 = NULL; - if (*bh2) - *bh2 = NULL; - /* We always want to lock the one with the lower lockid first. */ if (oi1->ip_blkno > oi2->ip_blkno) mlog_errno(-ENOLCK); /* lock id1 */ - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); + status = ocfs2_inode_lock_nested(inode1, &bh1, 1, + OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -4776,15 +4773,25 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, /* lock id2 */ if (!same_inode) { - status = ocfs2_inode_lock_nested(inode2, bh2, 1, + status = ocfs2_inode_lock_nested(inode2, &bh2, 1, OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto out_cl1; } - } else - *bh2 = *bh1; + } else { + bh2 = bh1; + } + + /* + * If we swapped inode order above, we have to swap the buffer heads + * before passing them back to the caller. + */ + if (need_swap) + swap(bh1, bh2); + *bh_s = bh1; + *bh_t = bh2; trace_ocfs2_double_lock_end( (unsigned long long)oi1->ip_blkno, @@ -4794,8 +4801,7 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, out_cl1: ocfs2_inode_unlock(inode1, 1); - brelse(*bh1); - *bh1 = NULL; + brelse(bh1); out_rw2: ocfs2_rw_unlock(inode2, 1); out_i2: -- cgit v1.2.3 From eebf36480678f948b3ed15d56ca7b8e6194e7c18 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Mar 2019 20:44:09 -0700 Subject: fs/proc/kcore.c: make kcore_modules static Fix sparse warning: fs/proc/kcore.c:591:19: warning: symbol 'kcore_modules' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20190320135417.13272-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Acked-by: Mukesh Ojha Cc: Alexey Dobriyan Cc: Omar Sandoval Cc: James Morse Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d29d869abec1..f5834488b67d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -615,7 +615,7 @@ static void __init proc_kcore_text_init(void) /* * MODULES_VADDR has no intersection with VMALLOC_ADDR. */ -struct kcore_list kcore_modules; +static struct kcore_list kcore_modules; static void __init add_modules_range(void) { if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { -- cgit v1.2.3 From 2620327852478e695afb2eebe66c354b3bc456cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 28 Mar 2019 20:44:36 -0700 Subject: fs: fs_parser: fix printk format warning Fix printk format warning (seen on i386 builds) by using ptrdiff format specifier (%t): fs/fs_parser.c:413:6: warning: format `%lu' expects argument of type `long unsigned int', but argument 3 has type `int' [-Wformat=] Link: http://lkml.kernel.org/r/19432668-ffd3-fbb2-af4f-1c8e48f6cc81@infradead.org Signed-off-by: Randy Dunlap Acked-by: Geert Uytterhoeven Cc: David Howells Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 842e8f749db6..570d71043acf 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -410,7 +410,7 @@ bool fs_validate_description(const struct fs_parameter_description *desc) for (param = desc->specs; param->name; param++) { if (param->opt == e->opt && param->type != fs_param_is_enum) { - pr_err("VALIDATE %s: e[%lu] enum val for %s\n", + pr_err("VALIDATE %s: e[%tu] enum val for %s\n", name, e - desc->enums, param->name); good = false; } -- cgit v1.2.3 From 23da9588037ecdd4901db76a5b79a42b529c4ec3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Mar 2019 20:44:40 -0700 Subject: fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links Syzkaller reports: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 5373 Comm: syz-executor.0 Not tainted 5.0.0-rc8+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:put_links+0x101/0x440 fs/proc/proc_sysctl.c:1599 Code: 00 0f 85 3a 03 00 00 48 8b 43 38 48 89 44 24 20 48 83 c0 38 48 89 c2 48 89 44 24 28 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 fe 02 00 00 48 8b 74 24 20 48 c7 c7 60 2a 9d 91 RSP: 0018:ffff8881d828f238 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8881e01b1140 RCX: ffffffff8ee98267 RDX: 0000000000000007 RSI: ffffc90001479000 RDI: ffff8881e01b1178 RBP: dffffc0000000000 R08: ffffed103ee27259 R09: ffffed103ee27259 R10: 0000000000000001 R11: ffffed103ee27258 R12: fffffffffffffff4 R13: 0000000000000006 R14: ffff8881f59838c0 R15: dffffc0000000000 FS: 00007f072254f700(0000) GS:ffff8881f7100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff8b286668 CR3: 00000001f0542002 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: drop_sysctl_table+0x152/0x9f0 fs/proc/proc_sysctl.c:1629 get_subdir fs/proc/proc_sysctl.c:1022 [inline] __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335 br_netfilter_init+0xbc/0x1000 [br_netfilter] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f072254ec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000003 RBP: 00007f072254ec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f072254f6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: br_netfilter(+) dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb_dw2102 dvb_usb classmate_laptop palmas_regulator cn videobuf2_v4l2 v4l2_common snd_soc_bd28623 mptbase snd_usb_usx2y snd_usbmidi_lib snd_rawmidi wmi libnvdimm lockd sunrpc grace rc_kworld_pc150u rc_core rtc_da9063 sha1_ssse3 i2c_cros_ec_tunnel adxl34x_spi adxl34x nfnetlink lib80211 i5500_temp dvb_as102 dvb_core videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops udc_core lnbp22 leds_lp3952 hid_roccat_ryos s1d13xxxfb mtd vport_geneve openvswitch nf_conncount nf_nat_ipv6 nsh geneve udp_tunnel ip6_udp_tunnel snd_soc_mt6351 sis_agp phylink snd_soc_adau1761_spi snd_soc_adau1761 snd_soc_adau17x1 snd_soc_core snd_pcm_dmaengine ac97_bus snd_compress snd_soc_adau_utils snd_soc_sigmadsp_regmap snd_soc_sigmadsp raid_class hid_roccat_konepure hid_roccat_common hid_roccat c2port_duramar2150 core mdio_bcm_unimac iptable_security iptable_raw iptable_mangle iptable_nat nf_nat_ipv4 nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim devlink vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel joydev mousedev ide_pci_generic piix aesni_intel aes_x86_64 ide_core crypto_simd atkbd cryptd glue_helper serio_raw ata_generic pata_acpi i2c_piix4 floppy sch_fq_codel ip_tables x_tables ipv6 [last unloaded: lm73] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 770020de38961fd0 ]--- A new dir entry can be created in get_subdir and its 'header->parent' is set to NULL. Only after insert_header success, it will be set to 'dir', otherwise 'header->parent' is set to NULL and drop_sysctl_table is called. However in err handling path of get_subdir, drop_sysctl_table also be called on 'new->header' regardless its value of parent pointer. Then put_links is called, which triggers NULL-ptr deref when access member of header->parent. In fact we have multiple error paths which call drop_sysctl_table() there, upon failure on insert_links() we also call drop_sysctl_table().And even in the successful case on __register_sysctl_table() we still always call drop_sysctl_table().This patch fix it. Link: http://lkml.kernel.org/r/20190314085527.13244-1-yuehaibing@huawei.com Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets") Signed-off-by: YueHaibing Reported-by: Hulk Robot Acked-by: Luis Chamberlain Cc: Kees Cook Cc: Alexey Dobriyan Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Al Viro Cc: Eric W. Biederman Cc: [3.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 4d598a399bbf..d65390727541 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1626,7 +1626,8 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - put_links(header); + if (parent) + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); -- cgit v1.2.3