From 0e37579506b9e0c11a4a95c8df6b156b2d6ccf9b Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Wed, 22 Feb 2012 10:49:58 +0000 Subject: NTFS: Remove unused variable. Found by Coverity software (http://scan.coverity.com). Signed-off-by: Anton Altaparmakov --- fs/ntfs/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 5a4a8af5c406..f907611cca73 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1,7 +1,7 @@ /* * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2001,2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -1239,7 +1239,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol) { MFT_REF mref; struct inode *vi; - ntfs_inode *ni; struct page *page; u32 *kaddr, *kend; ntfs_name *name = NULL; @@ -1290,7 +1289,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol) "is not the system volume.", i_size_read(vi)); goto iput_out; } - ni = NTFS_I(vi); page = ntfs_map_page(vi->i_mapping, 0); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read from hiberfil.sys."); -- cgit v1.2.3 From 45d95bcd7ac961eef26374a0ad6100cda55bcea1 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Wed, 22 Feb 2012 11:15:43 +0000 Subject: NTFS: Do not dereference pointer before checking for NULL. Found by Coverity software (http://scan.coverity.com). Signed-off-by: Anton Altaparmakov --- fs/ntfs/attrib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index f14fde2b03d6..e0281992ddc3 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1,7 +1,7 @@ /** * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -345,10 +345,10 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, unsigned long flags; bool is_retry = false; + BUG_ON(!ni); ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", ni->mft_no, (unsigned long long)vcn, write_locked ? "write" : "read"); - BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); if (!ni->runlist.rl) { @@ -469,9 +469,9 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, int err = 0; bool is_retry = false; + BUG_ON(!ni); ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.", ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out"); - BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); if (!ni->runlist.rl) { -- cgit v1.2.3 From 9b556248ecb059095e000f77c4b84899feb50098 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Fri, 24 Feb 2012 09:17:09 +0000 Subject: NTFS: Correct two spelling errors "dealocate" to "deallocate" in mft.c. From: Masanari Iida Signed-off-by: Anton Altaparmakov --- fs/ntfs/mft.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 382857f9c7db..3014a36a255b 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,7 +1,7 @@ /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) ntfs_error(vol->sb, "Failed to merge runlists for mft " "bitmap."); if (ntfs_cluster_free_from_rl(vol, rl2)) { - ntfs_error(vol->sb, "Failed to dealocate " + ntfs_error(vol->sb, "Failed to deallocate " "allocated cluster.%s", es); NVolSetErrors(vol); } @@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) ntfs_error(vol->sb, "Failed to merge runlists for mft data " "attribute."); if (ntfs_cluster_free_from_rl(vol, rl2)) { - ntfs_error(vol->sb, "Failed to dealocate clusters " + ntfs_error(vol->sb, "Failed to deallocate clusters " "from the mft data attribute.%s", es); NVolSetErrors(vol); } -- cgit v1.2.3 From 4043b886b0740ded65f633fc4b7225d624c7e658 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 16 Jan 2012 15:46:21 +0000 Subject: GFS2: Fix race between lru_list and glock ref count This patch fixes a narrow race window between the glock ref count hitting zero and glocks being removed from the lru_list. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 376816fcd040..351a3e797789 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -167,14 +167,19 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl) spin_unlock(&lru_lock); } -static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl) { - spin_lock(&lru_lock); if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); clear_bit(GLF_LRU, &gl->gl_flags); } +} + +static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +{ + spin_lock(&lru_lock); + __gfs2_glock_remove_from_lru(gl); spin_unlock(&lru_lock); } @@ -217,11 +222,12 @@ void gfs2_glock_put(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); - if (atomic_dec_and_test(&gl->gl_ref)) { + if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); spin_lock_bucket(gl->gl_hash); hlist_bl_del_rcu(&gl->gl_list); spin_unlock_bucket(gl->gl_hash); - gfs2_glock_remove_from_lru(gl); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); -- cgit v1.2.3 From 718b97bd6b03445be53098e3c8f896aeebc304aa Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 16 Feb 2012 11:31:04 -0500 Subject: GFS2: Read in rindex if necessary during unlink This patch fixes a problem whereby you were unable to delete files until other file system operations were done (such as statfs, touch, writes, etc.) that caused the rindex to be read in. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a7d611b93f0f..c87faf48f0a1 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1035,14 +1035,19 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct buffer_head *bh; struct gfs2_holder ghs[3]; struct gfs2_rgrpd *rgd; - int error = -EROFS; + int error; + error = gfs2_rindex_update(sdp); + if (error) + return error; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); - if (!rgd) + if (!rgd) { + error = -EROFS; goto out_inodes; + } gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); -- cgit v1.2.3 From 9e73f571ea3afffca78c1f54128d57796e27532f Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 17 Feb 2012 09:15:52 -0500 Subject: GFS2: Ensure rindex is uptodate for fallocate This patch fixes a problem whereby gfs2_grow was failing and causing GFS2 to assert. The problem was that when GFS2's fallocate operation tried to acquire an "allocation" it made sure the rindex was up to date, and if not, it called gfs2_rindex_update. However, if the file being fallocated was the rindex itself, it was already locked at that point. By calling gfs2_rindex_update at an earlier point in time, we bring rindex up to date and thereby avoid trying to lock it when the "allocation" is acquired. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index c5fb3597f696..7f906c8b02ac 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -772,6 +772,11 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; + error = gfs2_rindex_update(sdp); + if (error) { + fs_warn(sdp, "rindex update returns %d\n", error); + return error; + } gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) -- cgit v1.2.3 From a365fbf354907430e6852f0c373b4b3eeff81ba3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 24 Feb 2012 15:09:14 +0000 Subject: GFS2: Read resource groups on mount This makes mount take slightly longer, but at the same time, the first write to the filesystem will be faster too. It also means that if there is a problem in the resource index, then we can refuse to mount rather than having to try and report that when the first write occurs. In addition, to avoid recursive locking, we hvae to take account of instances when the rindex glock may already be held when we are trying to update the rbtree of resource groups. Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 5 ----- fs/gfs2/inode.c | 14 +++----------- fs/gfs2/ops_fstype.c | 5 +++++ fs/gfs2/rgrp.c | 13 +++++++++---- 4 files changed, 17 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 7f906c8b02ac..c5fb3597f696 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -772,11 +772,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; - error = gfs2_rindex_update(sdp); - if (error) { - fs_warn(sdp, "rindex update returns %d\n", error); - return error; - } gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c87faf48f0a1..56987460cdae 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -391,10 +391,6 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) int error; int dblocks = 1; - error = gfs2_rindex_update(sdp); - if (error) - fs_warn(sdp, "rindex update returns %d\n", error); - error = gfs2_inplace_reserve(dip, RES_DINODE); if (error) goto out; @@ -1035,19 +1031,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct buffer_head *bh; struct gfs2_holder ghs[3]; struct gfs2_rgrpd *rgd; - int error; + int error = -EROFS; - error = gfs2_rindex_update(sdp); - if (error) - return error; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); - if (!rgd) { - error = -EROFS; + if (!rgd) goto out_inodes; - } + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 6aacf3f230a2..24f609c9ef91 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -800,6 +800,11 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) fs_err(sdp, "can't get quota file inode: %d\n", error); goto fail_rindex; } + + error = gfs2_rindex_update(sdp); + if (error) + goto fail_qinode; + return 0; fail_qinode: diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 981bfa32121a..49ada95209d0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -683,16 +683,21 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp) struct gfs2_glock *gl = ip->i_gl; struct gfs2_holder ri_gh; int error = 0; + int unlock_required = 0; /* Read new copy from disk if we don't have the latest */ if (!sdp->sd_rindex_uptodate) { mutex_lock(&sdp->sd_rindex_mutex); - error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); - if (error) - return error; + if (!gfs2_glock_is_locked_by_me(gl)) { + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); + if (error) + return error; + unlock_required = 1; + } if (!sdp->sd_rindex_uptodate) error = gfs2_ri_update(ip); - gfs2_glock_dq_uninit(&ri_gh); + if (unlock_required) + gfs2_glock_dq_uninit(&ri_gh); mutex_unlock(&sdp->sd_rindex_mutex); } -- cgit v1.2.3 From 164974a8f2a482f1abcb027c6d1a89dd79b14297 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 28 Feb 2012 16:31:12 -0800 Subject: ecryptfs: fix printk format warning for size_t Fix printk format warning (from Linus's suggestion): on i386: fs/ecryptfs/miscdev.c:433:38: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'unsigned int' and on x86_64: fs/ecryptfs/miscdev.c:433:38: warning: format '%u' expects type 'unsigned int', but argument 4 has type 'long unsigned int' Signed-off-by: Randy Dunlap Cc: Geert Uytterhoeven Cc: Tyler Hicks Cc: Dustin Kirkland Cc: ecryptfs@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/ecryptfs/miscdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 349209dc6a91..3a06f4043df4 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -429,7 +429,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, goto memdup; } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) { printk(KERN_WARNING "%s: Acceptable packet size range is " - "[%d-%lu], but amount of data written is [%zu].", + "[%d-%zu], but amount of data written is [%zu].", __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count); return -EINVAL; } -- cgit v1.2.3 From c8e252586f8d5de906385d8cf6385fee289a825e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 2 Mar 2012 10:43:48 -0800 Subject: regset: Prevent null pointer reference on readonly regsets The regset common infrastructure assumed that regsets would always have .get and .set methods, but not necessarily .active methods. Unfortunately people have since written regsets without .set methods. Rather than putting in stub functions everywhere, handle regsets with null .get or .set methods explicitly. Signed-off-by: H. Peter Anvin Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 2 +- include/linux/regset.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bcb884e2d613..07d096c49920 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1421,7 +1421,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, for (i = 1; i < view->n; ++i) { const struct user_regset *regset = &view->regsets[i]; do_thread_regset_writeback(t->task, regset); - if (regset->core_note_type && + if (regset->core_note_type && regset->get && (!regset->active || regset->active(t->task, regset))) { int ret; size_t size = regset->n * regset->size; diff --git a/include/linux/regset.h b/include/linux/regset.h index 8abee6556223..5150fd16ef93 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -335,6 +335,9 @@ static inline int copy_regset_to_user(struct task_struct *target, { const struct user_regset *regset = &view->regsets[setno]; + if (!regset->get) + return -EOPNOTSUPP; + if (!access_ok(VERIFY_WRITE, data, size)) return -EIO; @@ -358,6 +361,9 @@ static inline int copy_regset_from_user(struct task_struct *target, { const struct user_regset *regset = &view->regsets[setno]; + if (!regset->set) + return -EOPNOTSUPP; + if (!access_ok(VERIFY_READ, data, size)) return -EIO; -- cgit v1.2.3 From 8966be90304b394fd6a2c5af7b6b3abe2df3889c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Mar 2012 14:23:30 -0800 Subject: vfs: trivial __d_lookup_rcu() cleanups These don't change any semantics, but they clean up the code a bit and mark some arguments appropriately 'const'. They came up as I was doing the word-at-a-time dcache name accessor code, and cleaning this up now allows me to send out a smaller relevant interesting patch for the experimental stuff. Signed-off-by: Linus Torvalds --- fs/dcache.c | 13 ++++++++----- include/linux/dcache.h | 3 ++- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index fe19ac13f75f..138be96e25b6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly; static struct hlist_bl_head *dentry_hashtable __read_mostly; -static inline struct hlist_bl_head *d_hash(struct dentry *parent, +static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned long hash) { hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; @@ -1717,8 +1717,9 @@ EXPORT_SYMBOL(d_add_ci); * child is looked up. Thus, an interlocking stepping of sequence lock checks * is formed, giving integrity down the path walk. */ -struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, - unsigned *seq, struct inode **inode) +struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, + unsigned *seqp, struct inode **inode) { unsigned int len = name->len; unsigned int hash = name->hash; @@ -1748,6 +1749,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, * See Documentation/filesystems/path-lookup.txt for more details. */ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { + unsigned seq; struct inode *i; const char *tname; int tlen; @@ -1756,7 +1758,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, continue; seqretry: - *seq = read_seqcount_begin(&dentry->d_seq); + seq = read_seqcount_begin(&dentry->d_seq); if (dentry->d_parent != parent) continue; if (d_unhashed(dentry)) @@ -1771,7 +1773,7 @@ seqretry: * edge of memory when walking. If we could load this * atomically some other way, we could drop this check. */ - if (read_seqcount_retry(&dentry->d_seq, *seq)) + if (read_seqcount_retry(&dentry->d_seq, seq)) goto seqretry; if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { if (parent->d_op->d_compare(parent, *inode, @@ -1788,6 +1790,7 @@ seqretry: * order to do anything useful with the returned dentry * anyway. */ + *seqp = seq; *inode = i; return dentry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d64a55b23afd..61b24261e07a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -309,7 +309,8 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(struct dentry *, struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); extern struct dentry *__d_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, +extern struct dentry *__d_lookup_rcu(const struct dentry *parent, + const struct qstr *name, unsigned *seq, struct inode **inode); /** -- cgit v1.2.3 From 0145acc202ca613b23b5383e55df3c32a92ad1bf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Mar 2012 14:32:59 -0800 Subject: vfs: uninline full_name_hash() .. and also use it in lookup_one_len() rather than open-coding it. There aren't any performance-critical users, so inlining it is silly. But it wouldn't matter if it wasn't for the fact that the word-at-a-time dentry name patches want to conditionally replace the function, and uninlining it sets the stage for that. So again, this is a preparatory patch that doesn't change any semantics, and only prepares for a much cleaner and testable word-at-a-time dentry name accessor patch. Signed-off-by: Linus Torvalds --- fs/namei.c | 13 +++++++++---- include/linux/dcache.h | 9 +-------- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index a780ea515c47..ec72fa1acb14 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1374,6 +1374,14 @@ static inline int can_lookup(struct inode *inode) return 1; } +unsigned int full_name_hash(const unsigned char *name, unsigned int len) +{ + unsigned long hash = init_name_hash(); + while (len--) + hash = partial_name_hash(*name++, hash); + return end_name_hash(hash); +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1775,24 +1783,21 @@ static struct dentry *lookup_hash(struct nameidata *nd) struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { struct qstr this; - unsigned long hash; unsigned int c; WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); this.name = name; this.len = len; + this.hash = full_name_hash(name, len); if (!len) return ERR_PTR(-EACCES); - hash = init_name_hash(); while (len--) { c = *(const unsigned char *)name++; if (c == '/' || c == '\0') return ERR_PTR(-EACCES); - hash = partial_name_hash(c, hash); } - this.hash = end_name_hash(hash); /* * See if the low-level filesystem might want * to use its own hash.. diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 61b24261e07a..f1c7eb8461be 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -89,14 +89,7 @@ static inline unsigned long end_name_hash(unsigned long hash) } /* Compute the hash for a name string. */ -static inline unsigned int -full_name_hash(const unsigned char *name, unsigned int len) -{ - unsigned long hash = init_name_hash(); - while (len--) - hash = partial_name_hash(*name++, hash); - return end_name_hash(hash); -} +extern unsigned int full_name_hash(const unsigned char *, unsigned int); /* * Try to keep struct dentry aligned on 64 byte cachelines (this will -- cgit v1.2.3 From 200e9ef7ab51f3dce4f35f90ea458cf43ea83bb8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Mar 2012 14:49:24 -0800 Subject: vfs: split up name hashing in link_path_walk() into helper function The code in link_path_walk() that finds out the length and the hash of the next path component is some of the hottest code in the kernel. And I have a version of it that does things at the full width of the CPU wordsize at a time, but that means that we *really* want to split it up into a separate helper function. So this re-organizes the code a bit and splits the hashing part into a helper function called "hash_name()". It returns the length of the pathname component, while at the same time computing and writing the hash to the appropriate location. The code generation is slightly changed by this patch, but generally for the better - and the added abstraction actually makes the code easier to read too. And the new interface is well suited for replacing just the "hash_name()" function with alternative implementations. Signed-off-by: Linus Torvalds --- fs/namei.c | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index ec72fa1acb14..71807dc7e402 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1382,6 +1382,25 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len) return end_name_hash(hash); } +/* + * We know there's a real path component here of at least + * one character. + */ +static inline unsigned long hash_name(const char *name, unsigned int *hashp) +{ + unsigned long hash = init_name_hash(); + unsigned long len = 0, c; + + c = (unsigned char)*name; + do { + len++; + hash = partial_name_hash(c, hash); + c = (unsigned char)name[len]; + } while (c && c != '/'); + *hashp = end_name_hash(hash); + return len; +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -1402,31 +1421,22 @@ static int link_path_walk(const char *name, struct nameidata *nd) /* At this point we know we have a real path component. */ for(;;) { - unsigned long hash; struct qstr this; - unsigned int c; + long len; int type; err = may_lookup(nd); if (err) break; + len = hash_name(name, &this.hash); this.name = name; - c = *(const unsigned char *)name; - - hash = init_name_hash(); - do { - name++; - hash = partial_name_hash(c, hash); - c = *(const unsigned char *)name; - } while (c && (c != '/')); - this.len = name - (const char *) this.name; - this.hash = end_name_hash(hash); + this.len = len; type = LAST_NORM; - if (this.name[0] == '.') switch (this.len) { + if (name[0] == '.') switch (len) { case 2: - if (this.name[1] == '.') { + if (name[1] == '.') { type = LAST_DOTDOT; nd->flags |= LOOKUP_JUMPED; } @@ -1445,12 +1455,18 @@ static int link_path_walk(const char *name, struct nameidata *nd) } } - /* remove trailing slashes? */ - if (!c) + if (!name[len]) goto last_component; - while (*++name == '/'); - if (!*name) + /* + * If it wasn't NUL, we know it was '/'. Skip that + * slash, and continue until no more slashes. + */ + do { + len++; + } while (unlikely(name[len] == '/')); + if (!name[len]) goto last_component; + name += len; err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); if (err < 0) -- cgit v1.2.3 From ae942ae71934fddd0639160c24f6efa703d5784e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Mar 2012 19:40:57 -0800 Subject: vfs: export full_name_hash() function to modules Commit 5707c87f "vfs: uninline full_name_hash()" broke the modular build, because it needs exporting now that it isn't inlined any more. Reported-by: Tetsuo Handa Signed-off-by: Linus Torvalds --- fs/namei.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 71807dc7e402..e2ba62820a0f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1381,6 +1381,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len) hash = partial_name_hash(*name++, hash); return end_name_hash(hash); } +EXPORT_SYMBOL(full_name_hash); /* * We know there's a real path component here of at least -- cgit v1.2.3 From 5483f18e986ed5267b923bec12b407845181350b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Mar 2012 15:51:42 -0800 Subject: vfs: move dentry_cmp from to fs/dcache.c It's only used inside fs/dcache.c, and we're going to play games with it for the word-at-a-time patches. This time we really don't even want to export it, because it really is an internal function to fs/dcache.c, and has been since it was introduced. Having it in that extremely hot header file (it's included in pretty much everything, thanks to ) is a disaster for testing different versions, and is utterly pointless. We really should have some kind of header file diet thing, where we figure out which parts of header files are really better off private and only result in more expensive compiles. Signed-off-by: Linus Torvalds --- fs/dcache.c | 20 ++++++++++++++++++++ include/linux/dcache.h | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 138be96e25b6..bcbdb33fcc20 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -137,6 +137,26 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, } #endif +/* + * Compare 2 name strings, return 0 if they match, otherwise non-zero. + * The strings are both count bytes long, and count is non-zero. + */ +static inline int dentry_cmp(const unsigned char *cs, size_t scount, + const unsigned char *ct, size_t tcount) +{ + if (scount != tcount) + return 1; + + do { + if (*cs != *ct) + return 1; + cs++; + ct++; + tcount--; + } while (tcount); + return 0; +} + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4270bedd2308..ff5f5256d175 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -47,26 +47,6 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; -/* - * Compare 2 name strings, return 0 if they match, otherwise non-zero. - * The strings are both count bytes long, and count is non-zero. - */ -static inline int dentry_cmp(const unsigned char *cs, size_t scount, - const unsigned char *ct, size_t tcount) -{ - if (scount != tcount) - return 1; - - do { - if (*cs != *ct) - return 1; - cs++; - ct++; - tcount--; - } while (tcount); - return 0; -} - /* Name hashing routines. Initial hash value */ /* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ #define init_name_hash() 0 -- cgit v1.2.3 From 6414fa6a150111750011f477899d370244da4171 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Mar 2012 06:38:42 +0000 Subject: aout: move setup_arg_pages() prior to reading/mapping the binary Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 14 +++++++------- fs/binfmt_aout.c | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index fd843877e841..39e49091f648 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -315,6 +315,13 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->free_area_cache = TASK_UNMAPPED_BASE; current->mm->cached_hole_size = 0; + retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); + return retval; + } + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; @@ -410,13 +417,6 @@ beyond_if: set_brk(current->mm->start_brk, current->mm->brk); - retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); - return retval; - } - current->mm->start_stack = (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); /* start thread */ diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index a6395bdb26ae..1ff94054d35a 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -259,6 +259,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->free_area_cache = current->mm->mmap_base; current->mm->cached_hole_size = 0; + retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); + return retval; + } + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; @@ -352,13 +359,6 @@ beyond_if: return retval; } - retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); - return retval; - } - current->mm->start_stack = (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); #ifdef __alpha__ -- cgit v1.2.3 From 880641bb9da2473e9ecf6c708d993b29928c1b3c Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 5 Mar 2012 14:59:12 -0800 Subject: aio: wake up waiters when freeing unused kiocbs Bart Van Assche reported a hung fio process when either hot-removing storage or when interrupting the fio process itself. The (pruned) call trace for the latter looks like so: fio D 0000000000000001 0 6849 6848 0x00000004 ffff880092541b88 0000000000000046 ffff880000000000 ffff88012fa11dc0 ffff88012404be70 ffff880092541fd8 ffff880092541fd8 ffff880092541fd8 ffff880128b894d0 ffff88012404be70 ffff880092541b88 000000018106f24d Call Trace: schedule+0x3f/0x60 io_schedule+0x8f/0xd0 wait_for_all_aios+0xc0/0x100 exit_aio+0x55/0xc0 mmput+0x2d/0x110 exit_mm+0x10d/0x130 do_exit+0x671/0x860 do_group_exit+0x44/0xb0 get_signal_to_deliver+0x218/0x5a0 do_signal+0x65/0x700 do_notify_resume+0x65/0x80 int_signal+0x12/0x17 The problem lies with the allocation batching code. It will opportunistically allocate kiocbs, and then trim back the list of iocbs when there is not enough room in the completion ring to hold all of the events. In the case above, what happens is that the pruning back of events ends up freeing up the last active request and the context is marked as dead, so it is thus responsible for waking up waiters. Unfortunately, the code does not check for this condition, so we end up with a hung task. Signed-off-by: Jeff Moyer Reported-by: Bart Van Assche Tested-by: Bart Van Assche Cc: [3.2.x only] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 969beb0e2231..67e4b9047cc9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -490,6 +490,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; } + if (unlikely(!ctx->reqs_active && ctx->dead)) + wake_up_all(&ctx->wait); spin_unlock_irq(&ctx->ctx_lock); } -- cgit v1.2.3 From c415c3b47ea2754659d915cca387a20999044163 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: vfork: introduce complete_vfork_done() No functional changes. Move the clear-and-complete-vfork_done code into the new trivial helper, complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 8 ++------ include/linux/sched.h | 1 + kernel/fork.c | 17 ++++++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 92ce83a11e90..dccdcec913e9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1915,7 +1915,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct completion *vfork_done; int core_waiters = -EBUSY; init_completion(&core_state->startup); @@ -1934,11 +1933,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state) * Make sure nobody is waiting for us to release the VM, * otherwise we can deadlock when we wait on each other */ - vfork_done = tsk->vfork_done; - if (vfork_done) { - tsk->vfork_done = NULL; - complete(vfork_done); - } + if (tsk->vfork_done) + complete_vfork_done(tsk); if (core_waiters) wait_for_completion(&core_state->startup); diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d379a6bfd88..1b25a37f2aee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,6 +2291,7 @@ extern int do_execve(const char *, const char __user * const __user *, const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); +extern void complete_vfork_done(struct task_struct *tsk); struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); diff --git a/kernel/fork.c b/kernel/fork.c index e2cd3e2a5ae8..cf3d96379608 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -668,6 +668,14 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } +void complete_vfork_done(struct task_struct *tsk) +{ + struct completion *vfork_done = tsk->vfork_done; + + tsk->vfork_done = NULL; + complete(vfork_done); +} + /* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. @@ -683,8 +691,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) */ void mm_release(struct task_struct *tsk, struct mm_struct *mm) { - struct completion *vfork_done = tsk->vfork_done; - /* Get rid of any futexes when releasing the mm */ #ifdef CONFIG_FUTEX if (unlikely(tsk->robust_list)) { @@ -704,11 +710,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) /* Get rid of any cached register state */ deactivate_mm(tsk, mm); - /* notify parent sleeping on vfork() */ - if (vfork_done) { - tsk->vfork_done = NULL; - complete(vfork_done); - } + if (tsk->vfork_done) + complete_vfork_done(tsk); /* * If we're exiting normally, clear a user-space tid field if -- cgit v1.2.3 From 57b59c4a1400fa6c34764eab2e35a8762dc05a09 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: coredump_wait: don't call complete_vfork_done() Now that CLONE_VFORK is killable, coredump_wait() no longer needs complete_vfork_done(). zap_threads() should find and kill all tasks with the same ->mm, this includes our parent if ->vfork_done is set. mm_release() becomes the only caller, unexport complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 14 ++------------ include/linux/sched.h | 1 - kernel/fork.c | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index dccdcec913e9..153dee14fe55 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1926,19 +1926,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state) core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); - if (unlikely(core_waiters < 0)) - goto fail; - - /* - * Make sure nobody is waiting for us to release the VM, - * otherwise we can deadlock when we wait on each other - */ - if (tsk->vfork_done) - complete_vfork_done(tsk); - - if (core_waiters) + if (core_waiters > 0) wait_for_completion(&core_state->startup); -fail: + return core_waiters; } diff --git a/include/linux/sched.h b/include/linux/sched.h index b6467711f12e..11fcafaf4ae4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,7 +2291,6 @@ extern int do_execve(const char *, const char __user * const __user *, const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); -extern void complete_vfork_done(struct task_struct *tsk); struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); diff --git a/kernel/fork.c b/kernel/fork.c index 892c534ce6e3..44b0e21af50e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -668,7 +668,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } -void complete_vfork_done(struct task_struct *tsk) +static void complete_vfork_done(struct task_struct *tsk) { struct completion *vfork; -- cgit v1.2.3