summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/block_dev.c16
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c116
-rw-r--r--fs/ceph/export.c24
-rw-r--r--fs/ceph/file.c61
-rw-r--r--fs/ceph/inode.c48
-rw-r--r--fs/ceph/ioctl.c15
-rw-r--r--fs/ceph/ioctl.h1
-rw-r--r--fs/ceph/mds_client.c56
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/snap.c25
-rw-r--r--fs/ceph/super.c7
-rw-r--r--fs/ceph/super.h20
-rw-r--r--fs/ceph/xattr.c8
-rw-r--r--fs/cifs/cifsencrypt.c110
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/dir.c5
-rw-r--r--fs/cifs/link.c8
-rw-r--r--fs/cifs/readdir.c427
-rw-r--r--fs/cifs/smbencrypt.c8
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c60
-rw-r--r--fs/ext2/xattr.c10
-rw-r--r--fs/ext3/balloc.c38
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext3/fsync.c11
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/inode.c193
-rw-r--r--fs/ext3/ioctl.c4
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c13
-rw-r--r--fs/ext3/xattr.c12
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fs-writeback.c373
-rw-r--r--fs/gfs2/main.c2
-rw-r--r--fs/inode.c44
-rw-r--r--fs/jbd/checkpoint.c37
-rw-r--r--fs/jbd/commit.c57
-rw-r--r--fs/jbd/journal.c99
-rw-r--r--fs/jbd/transaction.c83
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/nfs/cache_lib.h2
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/notify/group.c2
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/notify/mark.c2
-rw-r--r--fs/notify/notification.c2
-rw-r--r--fs/notify/vfsmount_mark.c2
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/open.c78
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/posix_acl.c2
-rw-r--r--fs/proc/base.c16
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/read_write.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_bmap.c7
-rw-r--r--fs/xfs/xfs_dir2.c16
-rw-r--r--fs/xfs/xfs_filestream.c14
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_rename.c4
-rw-r--r--fs/xfs/xfs_vnodeops.c10
75 files changed, 1281 insertions, 978 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 4d433d34736f..f11e43ed907d 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -187,7 +187,7 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd);
*/
static struct inode *anon_inode_mkinode(void)
{
- struct inode *inode = new_inode(anon_inode_mnt->mnt_sb);
+ struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c62fb84944d5..f55aad4d1611 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -44,24 +44,28 @@ inline struct block_device *I_BDEV(struct inode *inode)
{
return &BDEV_I(inode)->bdev;
}
-
EXPORT_SYMBOL(I_BDEV);
/*
- * move the inode from it's current bdi to the a new bdi. if the inode is dirty
- * we need to move it onto the dirty list of @dst so that the inode is always
- * on the right list.
+ * Move the inode from its current bdi to a new bdi. If the inode is dirty we
+ * need to move it onto the dirty list of @dst so that the inode is always on
+ * the right list.
*/
static void bdev_inode_switch_bdi(struct inode *inode,
struct backing_dev_info *dst)
{
- spin_lock(&inode_wb_list_lock);
+ struct backing_dev_info *old = inode->i_data.backing_dev_info;
+
+ if (unlikely(dst == old)) /* deadlock avoidance */
+ return;
+ bdi_lock_two(&old->wb, &dst->wb);
spin_lock(&inode->i_lock);
inode->i_data.backing_dev_info = dst;
if (inode->i_state & I_DIRTY)
list_move(&inode->i_wb_list, &dst->wb.b_dirty);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&old->wb.list_lock);
+ spin_unlock(&dst->wb.list_lock);
}
static sector_t max_block(struct block_device *bdev)
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 8d27af4bd8b9..7083d08b2a21 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -25,7 +25,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "ctree.h"
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7055d11c1efd..561262d35689 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2551,7 +2551,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
};
struct writeback_control wbc_writepages = {
.sync_mode = wbc->sync_mode,
- .older_than_this = NULL,
.nr_to_write = 64,
.range_start = page_offset(page) + PAGE_CACHE_SIZE,
.range_end = (loff_t)-1,
@@ -2584,7 +2583,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
};
struct writeback_control wbc_writepages = {
.sync_mode = mode,
- .older_than_this = NULL,
.nr_to_write = nr_pages * 2,
.range_start = start,
.range_end = end + 1,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e91b097e7252..caa26ab5ed68 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4467,7 +4467,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
- if (mode & S_IFDIR)
+ if (S_ISDIR(mode))
owner = 0;
else
owner = 1;
@@ -4512,7 +4512,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_inherit_iflags(inode, dir);
- if ((mode & S_IFREG)) {
+ if (S_ISREG(mode)) {
if (btrfs_test_opt(root, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
if (btrfs_test_opt(root, NODATACOW) ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 0dba6915712b..fb962efdacee 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -102,7 +102,7 @@ static int mdsc_show(struct seq_file *s, void *p)
path = NULL;
spin_lock(&req->r_old_dentry->d_lock);
seq_printf(s, " #%llx/%.*s (%s)",
- ceph_ino(req->r_old_dentry->d_parent->d_inode),
+ ceph_ino(req->r_old_dentry_dir),
req->r_old_dentry->d_name.len,
req->r_old_dentry->d_name.name,
path ? path : "");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 1065ac779840..382abc9a6a54 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata)
return 0;
- if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
- ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
- d_set_d_op(dentry, &ceph_dentry_ops);
- else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
- d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
- else
- d_set_d_op(dentry, &ceph_snap_dentry_ops);
-
di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
return -ENOMEM; /* oh well */
@@ -58,16 +50,42 @@ int ceph_init_dentry(struct dentry *dentry)
kmem_cache_free(ceph_dentry_cachep, di);
goto out_unlock;
}
+
+ if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
+ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ d_set_d_op(dentry, &ceph_dentry_ops);
+ else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+ d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
+ else
+ d_set_d_op(dentry, &ceph_snap_dentry_ops);
+
di->dentry = dentry;
di->lease_session = NULL;
- dentry->d_fsdata = di;
dentry->d_time = jiffies;
+ /* avoid reordering d_fsdata setup so that the check above is safe */
+ smp_mb();
+ dentry->d_fsdata = di;
ceph_dentry_lru_add(dentry);
out_unlock:
spin_unlock(&dentry->d_lock);
return 0;
}
+struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+
+ if (!dentry)
+ return NULL;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_parent) {
+ inode = dentry->d_parent->d_inode;
+ ihold(inode);
+ }
+ spin_unlock(&dentry->d_lock);
+ return inode;
+}
/*
@@ -133,7 +151,7 @@ more:
d_unhashed(dentry) ? "!hashed" : "hashed",
parent->d_subdirs.prev, parent->d_subdirs.next);
if (p == &parent->d_subdirs) {
- fi->at_end = 1;
+ fi->flags |= CEPH_F_ATEND;
goto out_unlock;
}
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -234,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
const int max_bytes = fsc->mount_options->max_readdir_bytes;
dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
- if (fi->at_end)
+ if (fi->flags & CEPH_F_ATEND)
return 0;
/* always start with . and .. */
@@ -403,7 +421,7 @@ more:
dout("readdir next frag is %x\n", frag);
goto more;
}
- fi->at_end = 1;
+ fi->flags |= CEPH_F_ATEND;
/*
* if dir_release_count still matches the dir, no dentries
@@ -435,7 +453,7 @@ static void reset_readdir(struct ceph_file_info *fi)
dput(fi->dentry);
fi->dentry = NULL;
}
- fi->at_end = 0;
+ fi->flags &= ~CEPH_F_ATEND;
}
static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
@@ -463,7 +481,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
- fi->at_end = 0;
+ fi->flags &= ~CEPH_F_ATEND;
}
retval = offset;
@@ -488,21 +506,13 @@ out:
}
/*
- * Process result of a lookup/open request.
- *
- * Mainly, make sure we return the final req->r_dentry (if it already
- * existed) in place of the original VFS-provided dentry when they
- * differ.
- *
- * Gracefully handle the case where the MDS replies with -ENOENT and
- * no trace (which it may do, at its discretion, e.g., if it doesn't
- * care to issue a lease on the negative dentry).
+ * Handle lookups for the hidden .snap directory.
*/
-struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
- struct dentry *dentry, int err)
+int ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
- struct inode *parent = dentry->d_parent->d_inode;
+ struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
/* .snap dir? */
if (err == -ENOENT &&
@@ -516,7 +526,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
d_add(dentry, inode);
err = 0;
}
+ return err;
+}
+/*
+ * Figure out final result of a lookup/open request.
+ *
+ * Mainly, make sure we return the final req->r_dentry (if it already
+ * existed) in place of the original VFS-provided dentry when they
+ * differ.
+ *
+ * Gracefully handle the case where the MDS replies with -ENOENT and
+ * no trace (which it may do, at its discretion, e.g., if it doesn't
+ * care to issue a lease on the negative dentry).
+ */
+struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
+{
if (err == -ENOENT) {
/* no trace? */
err = 0;
@@ -610,6 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_locked_dir = dir;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); /* will dput(dentry) */
dout("lookup result=%p\n", dentry);
@@ -789,6 +816,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
+ req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = dir;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -887,6 +915,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_dentry = dget(new_dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
+ req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = new_dir;
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -1002,36 +1031,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
+ int valid = 0;
struct inode *dir;
if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
- dir = dentry->d_parent->d_inode;
-
dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
ceph_dentry(dentry)->offset);
+ dir = ceph_get_dentry_parent_inode(dentry);
+
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
- goto out_touch;
+ valid = 1;
+ } else if (dentry->d_inode &&
+ ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+ valid = 1;
+ } else if (dentry_lease_is_valid(dentry) ||
+ dir_lease_is_valid(dir, dentry)) {
+ valid = 1;
}
- if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR)
- goto out_touch;
-
- if (dentry_lease_is_valid(dentry) ||
- dir_lease_is_valid(dir, dentry))
- goto out_touch;
- dout("d_revalidate %p invalid\n", dentry);
- d_drop(dentry);
- return 0;
-out_touch:
- ceph_dentry_lru_touch(dentry);
- return 1;
+ dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
+ if (valid)
+ ceph_dentry_lru_touch(dentry);
+ else
+ d_drop(dentry);
+ iput(dir);
+ return valid;
}
/*
@@ -1228,9 +1259,8 @@ void ceph_dentry_lru_del(struct dentry *dn)
* Return name hash for a given dentry. This is dependent on
* the parent directory's hash function.
*/
-unsigned ceph_dentry_hash(struct dentry *dn)
+unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
- struct inode *dir = dn->d_parent->d_inode;
struct ceph_inode_info *dci = ceph_inode(dir);
switch (dci->i_dir_layout.dl_dir_hash) {
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index f67b687550de..9fbcdecaaccd 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -46,7 +46,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
int type;
struct ceph_nfs_fh *fh = (void *)rawfh;
struct ceph_nfs_confh *cfh = (void *)rawfh;
- struct dentry *parent = dentry->d_parent;
+ struct dentry *parent;
struct inode *inode = dentry->d_inode;
int connected_handle_length = sizeof(*cfh)/4;
int handle_length = sizeof(*fh)/4;
@@ -55,26 +55,33 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EINVAL;
+ spin_lock(&dentry->d_lock);
+ parent = dget(dentry->d_parent);
+ spin_unlock(&dentry->d_lock);
+
if (*max_len >= connected_handle_length) {
dout("encode_fh %p connectable\n", dentry);
cfh->ino = ceph_ino(dentry->d_inode);
cfh->parent_ino = ceph_ino(parent->d_inode);
- cfh->parent_name_hash = ceph_dentry_hash(parent);
+ cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
+ dentry);
*max_len = connected_handle_length;
type = 2;
} else if (*max_len >= handle_length) {
if (connectable) {
*max_len = connected_handle_length;
- return 255;
+ type = 255;
+ } else {
+ dout("encode_fh %p\n", dentry);
+ fh->ino = ceph_ino(dentry->d_inode);
+ *max_len = handle_length;
+ type = 1;
}
- dout("encode_fh %p\n", dentry);
- fh->ino = ceph_ino(dentry->d_inode);
- *max_len = handle_length;
- type = 1;
} else {
*max_len = handle_length;
- return 255;
+ type = 255;
}
+ dput(parent);
return type;
}
@@ -123,7 +130,6 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
return dentry;
}
err = ceph_init_dentry(dentry);
-
if (err < 0) {
iput(inode);
return ERR_PTR(err);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 0d0eae05598f..ce549d31eeb7 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -122,7 +122,7 @@ int ceph_open(struct inode *inode, struct file *file)
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_file_info *cf = file->private_data;
- struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
+ struct inode *parent_inode = NULL;
int err;
int flags, fmode, wanted;
@@ -194,7 +194,10 @@ int ceph_open(struct inode *inode, struct file *file)
req->r_inode = inode;
ihold(inode);
req->r_num_caps = 1;
+ if (flags & (O_CREAT|O_TRUNC))
+ parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
if (!err)
err = ceph_init_file(inode, file, req->r_fmode);
ceph_mdsc_put_request(req);
@@ -222,9 +225,9 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
- struct file *file = nd->intent.open.file;
- struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
+ struct file *file;
struct ceph_mds_request *req;
+ struct dentry *ret;
int err;
int flags = nd->intent.open.flags;
@@ -242,16 +245,24 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
}
req->r_locked_dir = dir; /* caller holds dir->i_mutex */
- err = ceph_mdsc_do_request(mdsc, parent_inode, req);
- dentry = ceph_finish_lookup(req, dentry, err);
- if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
+ err = ceph_mdsc_do_request(mdsc,
+ (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
+ req);
+ err = ceph_handle_snapdir(req, dentry, err);
+ if (err)
+ goto out;
+ if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
- if (!err)
- err = ceph_init_file(req->r_dentry->d_inode, file,
- req->r_fmode);
+ if (err)
+ goto out;
+ file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
+ if (IS_ERR(file))
+ err = PTR_ERR(file);
+out:
+ ret = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req);
- dout("ceph_lookup_open result=%p\n", dentry);
- return dentry;
+ dout("ceph_lookup_open result=%p\n", ret);
+ return ret;
}
int ceph_release(struct inode *inode, struct file *file)
@@ -643,7 +654,8 @@ again:
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
- (inode->i_sb->s_flags & MS_SYNCHRONOUS))
+ (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
+ (fi->flags & CEPH_F_SYNC))
/* hmm, this isn't really async... */
ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
else
@@ -712,7 +724,7 @@ retry_snap:
want = CEPH_CAP_FILE_BUFFER;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
if (ret < 0)
- goto out;
+ goto out_put;
dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
@@ -720,12 +732,23 @@ retry_snap:
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
- (inode->i_sb->s_flags & MS_SYNCHRONOUS)) {
+ (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
+ (fi->flags & CEPH_F_SYNC)) {
ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
&iocb->ki_pos);
} else {
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+ /*
+ * buffered write; drop Fw early to avoid slow
+ * revocation if we get stuck on balance_dirty_pages
+ */
+ int dirty;
+ spin_lock(&inode->i_lock);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ spin_unlock(&inode->i_lock);
+ ceph_put_cap_refs(ci, got);
+
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
if ((ret >= 0 || ret == -EIOCBQUEUED) &&
((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
|| ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
@@ -733,7 +756,12 @@ retry_snap:
if (err < 0)
ret = err;
}
+
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+ goto out;
}
+
if (ret >= 0) {
int dirty;
spin_lock(&inode->i_lock);
@@ -743,12 +771,13 @@ retry_snap:
__mark_inode_dirty(inode, dirty);
}
-out:
+out_put:
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
ceph_cap_string(got));
ceph_put_cap_refs(ci, got);
+out:
if (ret == -EOLDSNAPC) {
dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index dfb2831d8d85..095799ba9dd1 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -560,7 +560,8 @@ static int fill_inode(struct inode *inode,
struct ceph_mds_reply_inode *info = iinfo->in;
struct ceph_inode_info *ci = ceph_inode(inode);
int i;
- int issued, implemented;
+ int issued = 0, implemented;
+ int updating_inode = 0;
struct timespec mtime, atime, ctime;
u32 nsplits;
struct ceph_buffer *xattr_blob = NULL;
@@ -599,7 +600,8 @@ static int fill_inode(struct inode *inode,
if (le64_to_cpu(info->version) > 0 &&
(ci->i_version & ~1) >= le64_to_cpu(info->version))
goto no_change;
-
+
+ updating_inode = 1;
issued = __ceph_caps_issued(ci, &implemented);
issued |= implemented | __ceph_caps_dirty(ci);
@@ -707,17 +709,6 @@ static int fill_inode(struct inode *inode,
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ceph_decode_timespec(&ci->i_rctime, &info->rctime);
-
- /* set dir completion flag? */
- if (ci->i_files == 0 && ci->i_subdirs == 0 &&
- ceph_snap(inode) == CEPH_NOSNAP &&
- (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
- (issued & CEPH_CAP_FILE_EXCL) == 0 &&
- (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
- dout(" marking %p complete (empty)\n", inode);
- /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
- ci->i_max_offset = 2;
- }
break;
default:
pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@@ -774,6 +765,19 @@ no_change:
__ceph_get_fmode(ci, cap_fmode);
}
+ /* set dir completion flag? */
+ if (S_ISDIR(inode->i_mode) &&
+ updating_inode && /* didn't jump to no_change */
+ ci->i_files == 0 && ci->i_subdirs == 0 &&
+ ceph_snap(inode) == CEPH_NOSNAP &&
+ (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+ (issued & CEPH_CAP_FILE_EXCL) == 0 &&
+ (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
+ dout(" marking %p complete (empty)\n", inode);
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
+ ci->i_max_offset = 2;
+ }
+
/* update delegation info? */
if (dirinfo)
ceph_fill_dirfrag(inode, dirinfo);
@@ -805,14 +809,14 @@ static void update_dentry_lease(struct dentry *dentry,
return;
spin_lock(&dentry->d_lock);
- dout("update_dentry_lease %p mask %d duration %lu ms ttl %lu\n",
- dentry, le16_to_cpu(lease->mask), duration, ttl);
+ dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
+ dentry, duration, ttl);
/* make lease_rdcache_gen match directory */
dir = dentry->d_parent->d_inode;
di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
- if (lease->mask == 0)
+ if (duration == 0)
goto out_unlock;
if (di->lease_gen == session->s_cap_gen &&
@@ -839,11 +843,13 @@ out_unlock:
/*
* Set dentry's directory position based on the current dir's max, and
* order it in d_subdirs, so that dcache_readdir behaves.
+ *
+ * Always called under directory's i_mutex.
*/
static void ceph_set_dentry_offset(struct dentry *dn)
{
struct dentry *dir = dn->d_parent;
- struct inode *inode = dn->d_parent->d_inode;
+ struct inode *inode = dir->d_inode;
struct ceph_dentry_info *di;
BUG_ON(!inode);
@@ -1022,9 +1028,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
/* do we have a dn lease? */
have_lease = have_dir_cap ||
- (le16_to_cpu(rinfo->dlease->mask) &
- CEPH_LOCK_DN);
-
+ le32_to_cpu(rinfo->dlease->duration_ms);
if (!have_lease)
dout("fill_trace no dentry lease or dir cap\n");
@@ -1560,7 +1564,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct inode *parent_inode = dentry->d_parent->d_inode;
+ struct inode *parent_inode;
const unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
@@ -1743,7 +1747,9 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
+ parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
ceph_cap_string(dirtied), mask);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index ef0b5f48e13a..3b256b50f7d8 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -38,7 +38,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
{
struct inode *inode = file->f_dentry->d_inode;
- struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
+ struct inode *parent_inode;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
struct ceph_ioctl_layout l;
@@ -87,7 +87,9 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
req->r_args.setlayout.layout.fl_pg_preferred =
cpu_to_le32(l.preferred_osd);
+ parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
ceph_mdsc_put_request(req);
return err;
}
@@ -231,6 +233,14 @@ static long ceph_ioctl_lazyio(struct file *file)
return 0;
}
+static long ceph_ioctl_syncio(struct file *file)
+{
+ struct ceph_file_info *fi = file->private_data;
+
+ fi->flags |= CEPH_F_SYNC;
+ return 0;
+}
+
long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
@@ -249,6 +259,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case CEPH_IOC_LAZYIO:
return ceph_ioctl_lazyio(file);
+
+ case CEPH_IOC_SYNCIO:
+ return ceph_ioctl_syncio(file);
}
return -ENOTTY;
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index 52e8fd74d450..0c5167e43180 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -40,5 +40,6 @@ struct ceph_ioctl_dataloc {
struct ceph_ioctl_dataloc)
#define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4)
+#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5)
#endif
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0c1d91756528..fee028b5332e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -483,22 +483,26 @@ void ceph_mdsc_release_request(struct kref *kref)
destroy_reply_info(&req->r_reply_info);
}
if (req->r_inode) {
- ceph_put_cap_refs(ceph_inode(req->r_inode),
- CEPH_CAP_PIN);
+ ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
iput(req->r_inode);
}
if (req->r_locked_dir)
- ceph_put_cap_refs(ceph_inode(req->r_locked_dir),
- CEPH_CAP_PIN);
+ ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
if (req->r_target_inode)
iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry) {
- ceph_put_cap_refs(
- ceph_inode(req->r_old_dentry->d_parent->d_inode),
- CEPH_CAP_PIN);
+ /*
+ * track (and drop pins for) r_old_dentry_dir
+ * separately, since r_old_dentry's d_parent may have
+ * changed between the dir mutex being dropped and
+ * this request being freed.
+ */
+ ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
+ CEPH_CAP_PIN);
dput(req->r_old_dentry);
+ iput(req->r_old_dentry_dir);
}
kfree(req->r_path1);
kfree(req->r_path2);
@@ -617,6 +621,12 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*/
struct dentry *get_nonsnap_parent(struct dentry *dentry)
{
+ /*
+ * we don't need to worry about protecting the d_parent access
+ * here because we never renaming inside the snapped namespace
+ * except to resplice to another snapdir, and either the old or new
+ * result is a valid result.
+ */
while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
dentry = dentry->d_parent;
return dentry;
@@ -652,7 +662,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
if (req->r_inode) {
inode = req->r_inode;
} else if (req->r_dentry) {
- struct inode *dir = req->r_dentry->d_parent->d_inode;
+ /* ignore race with rename; old or new d_parent is okay */
+ struct dentry *parent = req->r_dentry->d_parent;
+ struct inode *dir = parent->d_inode;
if (dir->i_sb != mdsc->fsc->sb) {
/* not this fs! */
@@ -660,8 +672,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests
* based on parent dir inode */
- struct dentry *dn =
- get_nonsnap_parent(req->r_dentry->d_parent);
+ struct dentry *dn = get_nonsnap_parent(parent);
inode = dn->d_inode;
dout("__choose_mds using nonsnap parent %p\n", inode);
} else if (req->r_dentry->d_inode) {
@@ -670,7 +681,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else {
/* dir + name */
inode = dir;
- hash = ceph_dentry_hash(req->r_dentry);
+ hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true;
}
}
@@ -1931,9 +1942,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
if (req->r_locked_dir)
ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
if (req->r_old_dentry)
- ceph_get_cap_refs(
- ceph_inode(req->r_old_dentry->d_parent->d_inode),
- CEPH_CAP_PIN);
+ ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
+ CEPH_CAP_PIN);
/* issue */
mutex_lock(&mdsc->mutex);
@@ -2714,7 +2724,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_mds_lease *h = msg->front.iov_base;
u32 seq;
struct ceph_vino vino;
- int mask;
struct qstr dname;
int release = 0;
@@ -2725,7 +2734,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
goto bad;
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
- mask = le16_to_cpu(h->mask);
seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
@@ -2737,8 +2745,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */
inode = ceph_find_inode(sb, vino);
- dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
- ceph_lease_op_name(h->action), mask, vino.ino, inode,
+ dout("handle_lease %s, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), vino.ino, inode,
dname.len, dname.name);
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
@@ -2828,7 +2836,6 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
return;
lease = msg->front.iov_base;
lease->action = action;
- lease->mask = cpu_to_le16(1);
lease->ino = cpu_to_le64(ceph_vino(inode).ino);
lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
lease->seq = cpu_to_le32(seq);
@@ -2850,7 +2857,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
* Pass @inode always, @dentry is optional.
*/
void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
- struct dentry *dentry, int mask)
+ struct dentry *dentry)
{
struct ceph_dentry_info *di;
struct ceph_mds_session *session;
@@ -2858,7 +2865,6 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
BUG_ON(inode == NULL);
BUG_ON(dentry == NULL);
- BUG_ON(mask == 0);
/* is dentry lease valid? */
spin_lock(&dentry->d_lock);
@@ -2868,8 +2874,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
di->lease_gen != di->lease_session->s_cap_gen ||
!time_before(jiffies, dentry->d_time)) {
dout("lease_release inode %p dentry %p -- "
- "no lease on %d\n",
- inode, dentry, mask);
+ "no lease\n",
+ inode, dentry);
spin_unlock(&dentry->d_lock);
return;
}
@@ -2880,8 +2886,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
__ceph_mdsc_drop_dentry_lease(dentry);
spin_unlock(&dentry->d_lock);
- dout("lease_release inode %p dentry %p mask %d to mds%d\n",
- inode, dentry, mask, session->s_mds);
+ dout("lease_release inode %p dentry %p to mds%d\n",
+ inode, dentry, session->s_mds);
ceph_mdsc_lease_send_msg(session, inode, dentry,
CEPH_MDS_LEASE_RELEASE, seq);
ceph_put_mds_session(session);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 7d8a0d662d56..4bb239921dbd 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -171,6 +171,7 @@ struct ceph_mds_request {
struct inode *r_inode; /* arg1 */
struct dentry *r_dentry; /* arg1 */
struct dentry *r_old_dentry; /* arg2: rename from or link from */
+ struct inode *r_old_dentry_dir; /* arg2: old dentry's parent dir */
char *r_path1, *r_path2;
struct ceph_vino r_ino1, r_ino2;
@@ -333,7 +334,7 @@ extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
struct inode *inode,
- struct dentry *dn, int mask);
+ struct dentry *dn);
extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 54b14de2e729..e26437191333 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -449,6 +449,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
dirty = __ceph_caps_dirty(ci);
+
+ /*
+ * If there is a write in progress, treat that as a dirty Fw,
+ * even though it hasn't completed yet; by the time we finish
+ * up this capsnap it will be.
+ */
+ if (used & CEPH_CAP_FILE_WR)
+ dirty |= CEPH_CAP_FILE_WR;
+
if (__ceph_have_pending_cap_snap(ci)) {
/* there is no point in queuing multiple "pending" cap_snaps,
as no new writes are allowed to start when pending, so any
@@ -456,13 +465,19 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
- } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) ||
- (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
- CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) {
+ } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
+ CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
struct ceph_snap_context *snapc = ci->i_head_snapc;
- dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
- capsnap, snapc);
+ /*
+ * if we are a sync write, we may need to go to the snaprealm
+ * to get the current snapc.
+ */
+ if (!snapc)
+ snapc = ci->i_snap_realm->cached_context;
+
+ dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
+ inode, capsnap, snapc, ceph_cap_string(dirty));
ihold(inode);
atomic_set(&capsnap->nref, 1);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f2f77fd3c14c..d47c5ec7fb1f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -73,8 +73,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
*/
buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
- buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >>
- (CEPH_BLOCK_SHIFT-10);
+ buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
buf->f_files = le64_to_cpu(st.num_objects);
@@ -780,6 +779,10 @@ static int ceph_register_bdi(struct super_block *sb,
fsc->backing_dev_info.ra_pages =
(fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>> PAGE_SHIFT;
+ else
+ fsc->backing_dev_info.ra_pages =
+ default_backing_dev_info.ra_pages;
+
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
atomic_long_inc_return(&bdi_seq));
if (!err)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 30446b144e3d..a23eed526f05 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -543,13 +543,16 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
/*
* we keep buffered readdir results attached to file->private_data
*/
+#define CEPH_F_SYNC 1
+#define CEPH_F_ATEND 2
+
struct ceph_file_info {
- int fmode; /* initialized on open */
+ short fmode; /* initialized on open */
+ short flags; /* CEPH_F_* */
/* readdir: position within the dir */
u32 frag;
struct ceph_mds_request *last_readdir;
- int at_end;
/* readdir: position within a frag */
unsigned offset; /* offset of last chunk, adjusted for . and .. */
@@ -789,6 +792,8 @@ extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
ceph_snapdir_dentry_ops;
extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
+extern int ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err);
extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err);
@@ -796,7 +801,8 @@ extern void ceph_dentry_lru_add(struct dentry *dn);
extern void ceph_dentry_lru_touch(struct dentry *dn);
extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
-extern unsigned ceph_dentry_hash(struct dentry *dn);
+extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
+extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
/*
* our d_ops vary depending on whether the inode is live,
@@ -819,14 +825,6 @@ extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p,
int p_locks, int f_locks);
extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
-static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
-{
- if (dentry && dentry->d_parent)
- return dentry->d_parent->d_inode;
-
- return NULL;
-}
-
/* debugfs.c */
extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f42d730f1b66..96c6739a0280 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -629,7 +629,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct inode *parent_inode = dentry->d_parent->d_inode;
+ struct inode *parent_inode;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc;
int err;
@@ -677,7 +677,9 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
req->r_data_len = size;
dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
+ parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
ceph_mdsc_put_request(req);
dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
@@ -788,7 +790,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = dentry->d_inode;
- struct inode *parent_inode = dentry->d_parent->d_inode;
+ struct inode *parent_inode;
struct ceph_mds_request *req;
int err;
@@ -802,7 +804,9 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
req->r_num_caps = 1;
req->r_path2 = kstrdup(name, GFP_NOFS);
+ parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
ceph_mdsc_put_request(req);
return err;
}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 5a0ee7f2af06..259991bd2112 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -52,19 +52,29 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
if (rc) {
- cERROR(1, "%s: Oould not init md5\n", __func__);
+ cERROR(1, "%s: Could not init md5\n", __func__);
return rc;
}
- crypto_shash_update(&server->secmech.sdescmd5->shash,
+ rc = crypto_shash_update(&server->secmech.sdescmd5->shash,
server->session_key.response, server->session_key.len);
+ if (rc) {
+ cERROR(1, "%s: Could not update with response\n", __func__);
+ return rc;
+ }
- crypto_shash_update(&server->secmech.sdescmd5->shash,
+ rc = crypto_shash_update(&server->secmech.sdescmd5->shash,
cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
+ if (rc) {
+ cERROR(1, "%s: Could not update with payload\n", __func__);
+ return rc;
+ }
rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
+ if (rc)
+ cERROR(1, "%s: Could not generate md5 hash\n", __func__);
- return 0;
+ return rc;
}
/* must be called with server->srv_mutex held */
@@ -112,12 +122,16 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
if (rc) {
- cERROR(1, "%s: Oould not init md5\n", __func__);
+ cERROR(1, "%s: Could not init md5\n", __func__);
return rc;
}
- crypto_shash_update(&server->secmech.sdescmd5->shash,
+ rc = crypto_shash_update(&server->secmech.sdescmd5->shash,
server->session_key.response, server->session_key.len);
+ if (rc) {
+ cERROR(1, "%s: Could not update with response\n", __func__);
+ return rc;
+ }
for (i = 0; i < n_vec; i++) {
if (iov[i].iov_len == 0)
@@ -131,14 +145,24 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
if (i == 0) {
if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
break; /* nothing to sign or corrupt header */
+ rc =
crypto_shash_update(&server->secmech.sdescmd5->shash,
iov[i].iov_base + 4, iov[i].iov_len - 4);
- } else
+ } else {
+ rc =
crypto_shash_update(&server->secmech.sdescmd5->shash,
iov[i].iov_base, iov[i].iov_len);
+ }
+ if (rc) {
+ cERROR(1, "%s: Could not update with payload\n",
+ __func__);
+ return rc;
+ }
}
rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
+ if (rc)
+ cERROR(1, "%s: Could not generate md5 hash\n", __func__);
return rc;
}
@@ -463,8 +487,12 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash);
- crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
+ rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
CIFS_NTHASH_SIZE);
+ if (rc) {
+ cERROR(1, "%s: Could not set NT Hash as a key", __func__);
+ return rc;
+ }
rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
if (rc) {
@@ -478,13 +506,18 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
if (user == NULL) {
cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
rc = -ENOMEM;
- goto calc_exit_2;
+ return rc;
}
len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp);
UniStrupr(user);
- crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
(char *)user, 2 * len);
+ kfree(user);
+ if (rc) {
+ cERROR(1, "%s: Could not update with user\n", __func__);
+ return rc;
+ }
/* convert ses->domainName to unicode and uppercase */
if (ses->domainName) {
@@ -494,13 +527,19 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
if (domain == NULL) {
cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
rc = -ENOMEM;
- goto calc_exit_1;
+ return rc;
}
len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
nls_cp);
+ rc =
crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
(char *)domain, 2 * len);
kfree(domain);
+ if (rc) {
+ cERROR(1, "%s: Could not update with domain\n",
+ __func__);
+ return rc;
+ }
} else if (ses->serverName) {
len = strlen(ses->serverName);
@@ -508,21 +547,26 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
if (server == NULL) {
cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
rc = -ENOMEM;
- goto calc_exit_1;
+ return rc;
}
len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
nls_cp);
+ rc =
crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
(char *)server, 2 * len);
kfree(server);
+ if (rc) {
+ cERROR(1, "%s: Could not update with server\n",
+ __func__);
+ return rc;
+ }
}
rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
ntlmv2_hash);
+ if (rc)
+ cERROR(1, "%s: Could not generate md5 hash\n", __func__);
-calc_exit_1:
- kfree(user);
-calc_exit_2:
return rc;
}
@@ -537,8 +581,12 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
return -1;
}
- crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__);
+ return rc;
+ }
rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
if (rc) {
@@ -552,11 +600,17 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
else
memcpy(ses->auth_key.response + offset,
ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
- crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
ses->auth_key.response + offset, ses->auth_key.len - offset);
+ if (rc) {
+ cERROR(1, "%s: Could not update with response\n", __func__);
+ return rc;
+ }
rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+ if (rc)
+ cERROR(1, "%s: Could not generate md5 hash\n", __func__);
return rc;
}
@@ -626,8 +680,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
}
/* now calculate the session key for NTLMv2 */
- crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ rc = crypto_shash_setkey(ses->server->secmech.hmacmd5,
ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__);
+ goto setup_ntlmv2_rsp_ret;
+ }
rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
if (rc) {
@@ -635,12 +693,18 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
goto setup_ntlmv2_rsp_ret;
}
- crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
ses->auth_key.response + CIFS_SESS_KEY_SIZE,
CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ cERROR(1, "%s: Could not update with response\n", __func__);
+ goto setup_ntlmv2_rsp_ret;
+ }
rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
ses->auth_key.response);
+ if (rc)
+ cERROR(1, "%s: Could not generate md5 hash\n", __func__);
setup_ntlmv2_rsp_ret:
kfree(tiblob);
@@ -668,8 +732,12 @@ calc_seckey(struct cifs_ses *ses)
desc.tfm = tfm_arc4;
- crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
+ rc = crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
CIFS_SESS_KEY_SIZE);
+ if (rc) {
+ cERROR(1, "%s: Could not set response as a key", __func__);
+ return rc;
+ }
sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
@@ -688,7 +756,7 @@ calc_seckey(struct cifs_ses *ses)
crypto_free_blkcipher(tfm_arc4);
- return 0;
+ return rc;
}
void
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6255fa812c7a..1fcf4e5b3112 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -501,7 +501,7 @@ struct cifs_search_info {
char *ntwrk_buf_start;
char *srch_entries_start;
char *last_entry;
- char *presume_name;
+ const char *presume_name;
unsigned int resume_name_len;
bool endOfSearch:1;
bool emptyDir:1;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 499f27fc8576..ae576fbb5142 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -57,11 +57,6 @@ build_path_from_dentry(struct dentry *direntry)
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
unsigned seq;
- if (direntry == NULL)
- return NULL; /* not much we can do if dentry is freed and
- we need to reopen the file after it was closed implicitly
- when the server crashed */
-
dirsep = CIFS_DIR_SEP(cifs_sb);
if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 556b1a0b54de..db3f18cdf024 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -74,8 +74,14 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
cERROR(1, "%s: Could not init md5 shash\n", __func__);
goto symlink_hash_err;
}
- crypto_shash_update(&sdescmd5->shash, link_str, link_len);
+ rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
+ if (rc) {
+ cERROR(1, "%s: Could not update iwth link_str\n", __func__);
+ goto symlink_hash_err;
+ }
rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
+ if (rc)
+ cERROR(1, "%s: Could not generate md5 hash\n", __func__);
symlink_hash_err:
crypto_free_shash(md5);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 965a3af186a1..5de03ec20144 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -4,6 +4,7 @@
* Directory search handling
*
* Copyright (C) International Business Machines Corp., 2004, 2008
+ * Copyright (C) Red Hat, Inc., 2011
* Author(s): Steve French (sfrench@us.ibm.com)
*
* This library is free software; you can redistribute it and/or modify
@@ -290,10 +291,10 @@ error_exit:
}
/* return length of unicode string in bytes */
-static int cifs_unicode_bytelen(char *str)
+static int cifs_unicode_bytelen(const char *str)
{
int len;
- __le16 *ustr = (__le16 *)str;
+ const __le16 *ustr = (const __le16 *)str;
for (len = 0; len <= PATH_MAX; len++) {
if (ustr[len] == 0)
@@ -334,78 +335,128 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
}
+struct cifs_dirent {
+ const char *name;
+ size_t namelen;
+ u32 resume_key;
+ u64 ino;
+};
+
+static void cifs_fill_dirent_unix(struct cifs_dirent *de,
+ const FILE_UNIX_INFO *info, bool is_unicode)
+{
+ de->name = &info->FileName[0];
+ if (is_unicode)
+ de->namelen = cifs_unicode_bytelen(de->name);
+ else
+ de->namelen = strnlen(de->name, PATH_MAX);
+ de->resume_key = info->ResumeKey;
+ de->ino = le64_to_cpu(info->basic.UniqueId);
+}
+
+static void cifs_fill_dirent_dir(struct cifs_dirent *de,
+ const FILE_DIRECTORY_INFO *info)
+{
+ de->name = &info->FileName[0];
+ de->namelen = le32_to_cpu(info->FileNameLength);
+ de->resume_key = info->FileIndex;
+}
+
+static void cifs_fill_dirent_full(struct cifs_dirent *de,
+ const FILE_FULL_DIRECTORY_INFO *info)
+{
+ de->name = &info->FileName[0];
+ de->namelen = le32_to_cpu(info->FileNameLength);
+ de->resume_key = info->FileIndex;
+}
+
+static void cifs_fill_dirent_search(struct cifs_dirent *de,
+ const SEARCH_ID_FULL_DIR_INFO *info)
+{
+ de->name = &info->FileName[0];
+ de->namelen = le32_to_cpu(info->FileNameLength);
+ de->resume_key = info->FileIndex;
+ de->ino = le64_to_cpu(info->UniqueId);
+}
+
+static void cifs_fill_dirent_both(struct cifs_dirent *de,
+ const FILE_BOTH_DIRECTORY_INFO *info)
+{
+ de->name = &info->FileName[0];
+ de->namelen = le32_to_cpu(info->FileNameLength);
+ de->resume_key = info->FileIndex;
+}
+
+static void cifs_fill_dirent_std(struct cifs_dirent *de,
+ const FIND_FILE_STANDARD_INFO *info)
+{
+ de->name = &info->FileName[0];
+ /* one byte length, no endianess conversion */
+ de->namelen = info->FileNameLength;
+ de->resume_key = info->ResumeKey;
+}
+
+static int cifs_fill_dirent(struct cifs_dirent *de, const void *info,
+ u16 level, bool is_unicode)
+{
+ memset(de, 0, sizeof(*de));
+
+ switch (level) {
+ case SMB_FIND_FILE_UNIX:
+ cifs_fill_dirent_unix(de, info, is_unicode);
+ break;
+ case SMB_FIND_FILE_DIRECTORY_INFO:
+ cifs_fill_dirent_dir(de, info);
+ break;
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+ cifs_fill_dirent_full(de, info);
+ break;
+ case SMB_FIND_FILE_ID_FULL_DIR_INFO:
+ cifs_fill_dirent_search(de, info);
+ break;
+ case SMB_FIND_FILE_BOTH_DIRECTORY_INFO:
+ cifs_fill_dirent_both(de, info);
+ break;
+ case SMB_FIND_FILE_INFO_STANDARD:
+ cifs_fill_dirent_std(de, info);
+ break;
+ default:
+ cFYI(1, "Unknown findfirst level %d", level);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
#define UNICODE_DOT cpu_to_le16(0x2e)
/* return 0 if no match and 1 for . (current directory) and 2 for .. (parent) */
-static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
+static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode)
{
int rc = 0;
- char *filename = NULL;
- int len = 0;
-
- if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) {
- FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- if (cfile->srch_inf.unicode) {
- len = cifs_unicode_bytelen(filename);
- } else {
- /* BB should we make this strnlen of PATH_MAX? */
- len = strnlen(filename, 5);
- }
- } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) {
- FILE_DIRECTORY_INFO *pFindData =
- (FILE_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- } else if (cfile->srch_inf.info_level ==
- SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
- FILE_FULL_DIRECTORY_INFO *pFindData =
- (FILE_FULL_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- } else if (cfile->srch_inf.info_level ==
- SMB_FIND_FILE_ID_FULL_DIR_INFO) {
- SEARCH_ID_FULL_DIR_INFO *pFindData =
- (SEARCH_ID_FULL_DIR_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- } else if (cfile->srch_inf.info_level ==
- SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
- FILE_BOTH_DIRECTORY_INFO *pFindData =
- (FILE_BOTH_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) {
- FIND_FILE_STANDARD_INFO *pFindData =
- (FIND_FILE_STANDARD_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = pFindData->FileNameLength;
- } else {
- cFYI(1, "Unknown findfirst level %d",
- cfile->srch_inf.info_level);
- }
- if (filename) {
- if (cfile->srch_inf.unicode) {
- __le16 *ufilename = (__le16 *)filename;
- if (len == 2) {
- /* check for . */
- if (ufilename[0] == UNICODE_DOT)
- rc = 1;
- } else if (len == 4) {
- /* check for .. */
- if ((ufilename[0] == UNICODE_DOT)
- && (ufilename[1] == UNICODE_DOT))
- rc = 2;
- }
- } else /* ASCII */ {
- if (len == 1) {
- if (filename[0] == '.')
- rc = 1;
- } else if (len == 2) {
- if ((filename[0] == '.') && (filename[1] == '.'))
- rc = 2;
- }
+ if (!de->name)
+ return 0;
+
+ if (is_unicode) {
+ __le16 *ufilename = (__le16 *)de->name;
+ if (de->namelen == 2) {
+ /* check for . */
+ if (ufilename[0] == UNICODE_DOT)
+ rc = 1;
+ } else if (de->namelen == 4) {
+ /* check for .. */
+ if (ufilename[0] == UNICODE_DOT &&
+ ufilename[1] == UNICODE_DOT)
+ rc = 2;
+ }
+ } else /* ASCII */ {
+ if (de->namelen == 1) {
+ if (de->name[0] == '.')
+ rc = 1;
+ } else if (de->namelen == 2) {
+ if (de->name[0] == '.' && de->name[1] == '.')
+ rc = 2;
}
}
@@ -427,66 +478,18 @@ static int is_dir_changed(struct file *file)
}
static int cifs_save_resume_key(const char *current_entry,
- struct cifsFileInfo *cifsFile)
+ struct cifsFileInfo *file_info)
{
- int rc = 0;
- unsigned int len = 0;
- __u16 level;
- char *filename;
-
- if ((cifsFile == NULL) || (current_entry == NULL))
- return -EINVAL;
-
- level = cifsFile->srch_inf.info_level;
-
- if (level == SMB_FIND_FILE_UNIX) {
- FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
+ struct cifs_dirent de;
+ int rc;
- filename = &pFindData->FileName[0];
- if (cifsFile->srch_inf.unicode) {
- len = cifs_unicode_bytelen(filename);
- } else {
- /* BB should we make this strnlen of PATH_MAX? */
- len = strnlen(filename, PATH_MAX);
- }
- cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
- } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
- FILE_DIRECTORY_INFO *pFindData =
- (FILE_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
- FILE_FULL_DIRECTORY_INFO *pFindData =
- (FILE_FULL_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) {
- SEARCH_ID_FULL_DIR_INFO *pFindData =
- (SEARCH_ID_FULL_DIR_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
- FILE_BOTH_DIRECTORY_INFO *pFindData =
- (FILE_BOTH_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_INFO_STANDARD) {
- FIND_FILE_STANDARD_INFO *pFindData =
- (FIND_FILE_STANDARD_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- /* one byte length, no name conversion */
- len = (unsigned int)pFindData->FileNameLength;
- cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
- } else {
- cFYI(1, "Unknown findfirst level %d", level);
- return -EINVAL;
+ rc = cifs_fill_dirent(&de, current_entry, file_info->srch_inf.info_level,
+ file_info->srch_inf.unicode);
+ if (!rc) {
+ file_info->srch_inf.presume_name = de.name;
+ file_info->srch_inf.resume_name_len = de.namelen;
+ file_info->srch_inf.resume_key = de.resume_key;
}
- cifsFile->srch_inf.resume_name_len = len;
- cifsFile->srch_inf.presume_name = filename;
return rc;
}
@@ -605,136 +608,70 @@ static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
return rc;
}
-/* inode num, inode type and filename returned */
-static int cifs_get_name_from_search_buf(struct qstr *pqst,
- char *current_entry, __u16 level, unsigned int unicode,
- struct cifs_sb_info *cifs_sb, unsigned int max_len, __u64 *pinum)
+static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir,
+ void *dirent, char *scratch_buf, unsigned int max_len)
{
+ struct cifsFileInfo *file_info = file->private_data;
+ struct super_block *sb = file->f_path.dentry->d_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_dirent de = { NULL, };
+ struct cifs_fattr fattr;
+ struct dentry *dentry;
+ struct qstr name;
int rc = 0;
- unsigned int len = 0;
- char *filename;
- struct nls_table *nlt = cifs_sb->local_nls;
-
- *pinum = 0;
-
- if (level == SMB_FIND_FILE_UNIX) {
- FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
-
- filename = &pFindData->FileName[0];
- if (unicode) {
- len = cifs_unicode_bytelen(filename);
- } else {
- /* BB should we make this strnlen of PATH_MAX? */
- len = strnlen(filename, PATH_MAX);
- }
+ ino_t ino;
- *pinum = le64_to_cpu(pFindData->basic.UniqueId);
- } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
- FILE_DIRECTORY_INFO *pFindData =
- (FILE_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
- FILE_FULL_DIRECTORY_INFO *pFindData =
- (FILE_FULL_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) {
- SEARCH_ID_FULL_DIR_INFO *pFindData =
- (SEARCH_ID_FULL_DIR_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- *pinum = le64_to_cpu(pFindData->UniqueId);
- } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
- FILE_BOTH_DIRECTORY_INFO *pFindData =
- (FILE_BOTH_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- } else if (level == SMB_FIND_FILE_INFO_STANDARD) {
- FIND_FILE_STANDARD_INFO *pFindData =
- (FIND_FILE_STANDARD_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- /* one byte length, no name conversion */
- len = (unsigned int)pFindData->FileNameLength;
- } else {
- cFYI(1, "Unknown findfirst level %d", level);
- return -EINVAL;
- }
+ rc = cifs_fill_dirent(&de, find_entry, file_info->srch_inf.info_level,
+ file_info->srch_inf.unicode);
+ if (rc)
+ return rc;
- if (len > max_len) {
- cERROR(1, "bad search response length %d past smb end", len);
+ if (de.namelen > max_len) {
+ cERROR(1, "bad search response length %zd past smb end",
+ de.namelen);
return -EINVAL;
}
- if (unicode) {
- pqst->len = cifs_from_ucs2((char *) pqst->name,
- (__le16 *) filename,
- UNICODE_NAME_MAX,
- min(len, max_len), nlt,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- pqst->len -= nls_nullsize(nlt);
- } else {
- pqst->name = filename;
- pqst->len = len;
- }
- return rc;
-}
-
-static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
- void *direntry, char *scratch_buf, unsigned int max_len)
-{
- int rc = 0;
- struct qstr qstring;
- struct cifsFileInfo *pCifsF;
- u64 inum;
- ino_t ino;
- struct super_block *sb;
- struct cifs_sb_info *cifs_sb;
- struct dentry *tmp_dentry;
- struct cifs_fattr fattr;
-
- /* get filename and len into qstring */
- /* get dentry */
- /* decide whether to create and populate ionde */
- if ((direntry == NULL) || (file == NULL))
- return -EINVAL;
-
- pCifsF = file->private_data;
-
- if ((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL))
- return -ENOENT;
-
- rc = cifs_entry_is_dot(pfindEntry, pCifsF);
/* skip . and .. since we added them first */
- if (rc != 0)
+ if (cifs_entry_is_dot(&de, file_info->srch_inf.unicode))
return 0;
- sb = file->f_path.dentry->d_sb;
- cifs_sb = CIFS_SB(sb);
-
- qstring.name = scratch_buf;
- rc = cifs_get_name_from_search_buf(&qstring, pfindEntry,
- pCifsF->srch_inf.info_level,
- pCifsF->srch_inf.unicode, cifs_sb,
- max_len, &inum /* returned */);
+ if (file_info->srch_inf.unicode) {
+ struct nls_table *nlt = cifs_sb->local_nls;
- if (rc)
- return rc;
+ name.name = scratch_buf;
+ name.len =
+ cifs_from_ucs2((char *)name.name, (__le16 *)de.name,
+ UNICODE_NAME_MAX,
+ min(de.namelen, (size_t)max_len), nlt,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ name.len -= nls_nullsize(nlt);
+ } else {
+ name.name = de.name;
+ name.len = de.namelen;
+ }
- if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
+ switch (file_info->srch_inf.info_level) {
+ case SMB_FIND_FILE_UNIX:
cifs_unix_basic_to_fattr(&fattr,
- &((FILE_UNIX_INFO *) pfindEntry)->basic,
- cifs_sb);
- else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
- cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *)
- pfindEntry, cifs_sb);
- else
- cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *)
- pfindEntry, cifs_sb);
+ &((FILE_UNIX_INFO *)find_entry)->basic,
+ cifs_sb);
+ break;
+ case SMB_FIND_FILE_INFO_STANDARD:
+ cifs_std_info_to_fattr(&fattr,
+ (FIND_FILE_STANDARD_INFO *)find_entry,
+ cifs_sb);
+ break;
+ default:
+ cifs_dir_info_to_fattr(&fattr,
+ (FILE_DIRECTORY_INFO *)find_entry,
+ cifs_sb);
+ break;
+ }
- if (inum && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
- fattr.cf_uniqueid = inum;
+ if (de.ino && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
+ fattr.cf_uniqueid = de.ino;
} else {
fattr.cf_uniqueid = iunique(sb, ROOT_I);
cifs_autodisable_serverino(cifs_sb);
@@ -750,12 +687,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
- tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr);
+ dentry = cifs_readdir_lookup(file->f_dentry, &name, &fattr);
- rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
- ino, fattr.cf_dtype);
+ rc = filldir(dirent, name.name, name.len, file->f_pos, ino,
+ fattr.cf_dtype);
- dput(tmp_dentry);
+ dput(dentry);
return rc;
}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 1c5b770c3141..42b9fff48751 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -157,8 +157,14 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
cERROR(1, "%s: Could not init md4 shash\n", __func__);
goto mdfour_err;
}
- crypto_shash_update(&sdescmd4->shash, link_str, link_len);
+ rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len);
+ if (rc) {
+ cERROR(1, "%s: Could not update with link_str\n", __func__);
+ goto mdfour_err;
+ }
rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
+ if (rc)
+ cERROR(1, "%s: Could not genereate md4 hash\n", __func__);
mdfour_err:
crypto_free_shash(md4);
diff --git a/fs/dcache.c b/fs/dcache.c
index be18598c7fd7..b05aac3a8cfc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2138,8 +2138,9 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way. Caller hold
- * rename_lock.
+ * dcache entries should not be moved in this way. Caller must hold
+ * rename_lock, the i_mutex of the source and target directories,
+ * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
*/
static void __d_move(struct dentry * dentry, struct dentry * target)
{
@@ -2202,7 +2203,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
+ * dcache entries should not be moved in this way. See the locking
+ * requirements for __d_move.
*/
void d_move(struct dentry *dentry, struct dentry *target)
{
@@ -2320,7 +2322,8 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
* @inode: inode to bind to the dentry, to which aliases may be attached
*
* Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one
+ * root directory alias in its place if there is one. Caller must hold the
+ * i_mutex of the parent directory.
*/
struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
{
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 01d2d9ef609c..44a360ca8046 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -35,7 +35,7 @@
#include <linux/buffer_head.h>
#include <linux/rwsem.h>
#include <linux/uio.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
/*
* How many user pages to map in one call to get_user_pages(). This determines
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f9cfd168fbe2..fe047d966dc5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -37,7 +37,7 @@
#include <asm/system.h>
#include <asm/io.h>
#include <asm/mman.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
/*
* LOCKING:
diff --git a/fs/exec.c b/fs/exec.c
index 842d5700c155..da80612a35f4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -181,14 +181,7 @@ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
return;
bprm->vma_pages = pages;
-
-#ifdef SPLIT_RSS_COUNTING
- add_mm_counter(mm, MM_ANONPAGES, diff);
-#else
- spin_lock(&mm->page_table_lock);
add_mm_counter(mm, MM_ANONPAGES, diff);
- spin_unlock(&mm->page_table_lock);
-#endif
}
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
@@ -277,7 +270,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
* use STACK_TOP because that can depend on attributes which aren't
* configured yet.
*/
- BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
+ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
vma->vm_end = STACK_TOP_MAX;
vma->vm_start = vma->vm_end - PAGE_SIZE;
vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
@@ -1430,9 +1423,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
}
}
read_unlock(&binfmt_lock);
+#ifdef CONFIG_MODULES
if (retval != -ENOEXEC || bprm->mm == NULL) {
break;
-#ifdef CONFIG_MODULES
} else {
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
if (printable(bprm->buf[0]) &&
@@ -1440,9 +1433,13 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
printable(bprm->buf[2]) &&
printable(bprm->buf[3]))
break; /* -ENOEXEC */
+ if (try)
+ break; /* -ENOEXEC */
request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
-#endif
}
+#else
+ break;
+#endif
}
return retval;
}
@@ -1649,15 +1646,26 @@ expand_fail:
return ret;
}
+static void cn_escape(char *str)
+{
+ for (; *str; str++)
+ if (*str == '/')
+ *str = '!';
+}
+
static int cn_print_exe_file(struct core_name *cn)
{
struct file *exe_file;
- char *pathbuf, *path, *p;
+ char *pathbuf, *path;
int ret;
exe_file = get_mm_exe_file(current->mm);
- if (!exe_file)
- return cn_printf(cn, "(unknown)");
+ if (!exe_file) {
+ char *commstart = cn->corename + cn->used;
+ ret = cn_printf(cn, "%s (path unknown)", current->comm);
+ cn_escape(commstart);
+ return ret;
+ }
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
if (!pathbuf) {
@@ -1671,9 +1679,7 @@ static int cn_print_exe_file(struct core_name *cn)
goto free_buf;
}
- for (p = path; *p; p++)
- if (*p == '/')
- *p = '!';
+ cn_escape(path);
ret = cn_printf(cn, "%s", path);
@@ -1745,16 +1751,22 @@ static int format_corename(struct core_name *cn, long signr)
break;
}
/* hostname */
- case 'h':
+ case 'h': {
+ char *namestart = cn->corename + cn->used;
down_read(&uts_sem);
err = cn_printf(cn, "%s",
utsname()->nodename);
up_read(&uts_sem);
+ cn_escape(namestart);
break;
+ }
/* executable */
- case 'e':
+ case 'e': {
+ char *commstart = cn->corename + cn->used;
err = cn_printf(cn, "%s", current->comm);
+ cn_escape(commstart);
break;
+ }
case 'E':
err = cn_print_exe_file(cn);
break;
@@ -2118,16 +2130,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
ispipe = format_corename(&cn, signr);
- if (ispipe == -ENOMEM) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
if (ispipe) {
int dump_count;
char **helper_argv;
+ if (ispipe < 0) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
+
if (cprm.limit == 1) {
/*
* Normally core limits are irrelevant to pipes, since
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 529970617a21..d27b71f1d183 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
if (name == NULL)
return -EINVAL;
+ name_len = strlen(name);
+ if (name_len > 255)
+ return -ERANGE;
+
down_read(&EXT2_I(inode)->xattr_sem);
error = -ENODATA;
if (!EXT2_I(inode)->i_file_acl)
@@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
error = -EIO;
goto cleanup;
}
- /* find named attribute */
- name_len = strlen(name);
- error = -ERANGE;
- if (name_len > 255)
- goto cleanup;
+ /* find named attribute */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
struct ext2_xattr_entry *next =
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index fe52297e31ad..6386d76f44a7 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -21,6 +21,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
+#include <trace/events/ext3.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
desc = ext3_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+ trace_ext3_read_block_bitmap(sb, block_group);
bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
struct rb_node * parent = NULL;
struct ext3_reserve_window_node *this;
+ trace_ext3_rsv_window_add(sb, rsv);
while (*p)
{
parent = *p;
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
rsv = &block_i->rsv_window_node;
if (!rsv_is_empty(&rsv->rsv_window)) {
spin_lock(rsv_lock);
- if (!rsv_is_empty(&rsv->rsv_window))
+ if (!rsv_is_empty(&rsv->rsv_window)) {
+ trace_ext3_discard_reservation(inode, rsv);
rsv_window_remove(inode->i_sb, rsv);
+ }
spin_unlock(rsv_lock);
}
}
@@ -683,14 +688,10 @@ error_return:
void ext3_free_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count)
{
- struct super_block * sb;
+ struct super_block *sb = inode->i_sb;
unsigned long dquot_freed_blocks;
- sb = inode->i_sb;
- if (!sb) {
- printk ("ext3_free_blocks: nonexistent device");
- return;
- }
+ trace_ext3_free_blocks(inode, block, count);
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (dquot_freed_blocks)
dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
else
start_block = grp_goal + group_first_block;
+ trace_ext3_alloc_new_reservation(sb, start_block);
size = my_rsv->rsv_goal_size;
if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
* check if the first free block is within the
* free space we just reserved
*/
- if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
+ if (start_block >= my_rsv->rsv_start &&
+ start_block <= my_rsv->rsv_end) {
+ trace_ext3_reserved(sb, start_block, my_rsv);
return 0; /* success */
+ }
/*
* if the first free bit we found is out of the reservable space
* continue search for next reservable space,
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
*errp = -ENOSPC;
sb = inode->i_sb;
- if (!sb) {
- printk("ext3_new_block: nonexistent device");
- return 0;
- }
/*
* Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
return 0;
}
+ trace_ext3_request_blocks(inode, goal, num);
+
sbi = EXT3_SB(sb);
- es = EXT3_SB(sb)->s_es;
+ es = sbi->s_es;
ext3_debug("goal=%lu.\n", goal);
/*
* Allocate a block from reservation only when
@@ -1742,6 +1745,10 @@ allocated:
brelse(bitmap_bh);
dquot_free_block(inode, *count-num);
*count = num;
+
+ trace_ext3_allocate_blocks(inode, goal, num,
+ (unsigned long long)ret_block);
+
return ret_block;
io_error:
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
if ((next - start) < minblocks)
goto free_extent;
+ trace_ext3_discard_blocks(sb, discard_block, next - start);
/* Send the TRIM command down to the device */
err = sb_issue_discard(sb, discard_block, next - start,
GFP_NOFS, 0);
@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
if (start >= max_blks)
- goto out;
+ return -EINVAL;
if (start + len > max_blks)
len = max_blks - start;
@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (ret >= 0)
ret = 0;
-
-out:
range->len = trimmed * sb->s_blocksize;
return ret;
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 2be5b99097f1..724df69847dc 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
};
const struct inode_operations ext3_file_inode_operations = {
- .truncate = ext3_truncate,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 0bcf63adb80a..d494c554c6e6 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -30,6 +30,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
+#include <trace/events/ext3.h>
/*
* akpm: A new design for ext3_sync_file().
@@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret, needs_barrier = 0;
tid_t commit_tid;
+ trace_ext3_sync_file_enter(file, datasync);
+
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
- return ret;
+ goto out;
/*
* Taking the mutex here just to keep consistent with how fsync was
@@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
if (ext3_should_journal_data(inode)) {
mutex_unlock(&inode->i_mutex);
- return ext3_force_commit(inode->i_sb);
+ ret = ext3_force_commit(inode->i_sb);
+ goto out;
}
if (datasync)
@@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+
mutex_unlock(&inode->i_mutex);
+out:
+ trace_ext3_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bfc2dc43681d..bf09cbf938cc 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -23,6 +23,7 @@
#include <linux/buffer_head.h>
#include <linux/random.h>
#include <linux/bitops.h>
+#include <trace/events/ext3.h>
#include <asm/byteorder.h>
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
ino = inode->i_ino;
ext3_debug ("freeing inode %lu\n", ino);
+ trace_ext3_free_inode(inode);
is_directory = S_ISDIR(inode->i_mode);
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
return ERR_PTR(-EPERM);
sb = dir->i_sb;
+ trace_ext3_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -601,6 +604,7 @@ got:
}
ext3_debug("allocating inode %lu\n", inode->i_ino);
+ trace_ext3_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2978a2a17a59..04da6acde85d 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,10 +38,12 @@
#include <linux/bio.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <trace/events/ext3.h>
#include "xattr.h"
#include "acl.h"
static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext3_block_truncate_page(struct inode *inode, loff_t from);
/*
* Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
might_sleep();
+ trace_ext3_forget(inode, is_metadata, blocknr);
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
*/
void ext3_evict_inode (struct inode *inode)
{
+ struct ext3_inode_info *ei = EXT3_I(inode);
struct ext3_block_alloc_info *rsv;
handle_t *handle;
int want_delete = 0;
+ trace_ext3_evict_inode(inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
dquot_initialize(inode);
want_delete = 1;
}
+ /*
+ * When journalling data dirty buffers are tracked only in the journal.
+ * So although mm thinks everything is clean and ready for reaping the
+ * inode might still have some pages to write in the running
+ * transaction or waiting to be checkpointed. Thus calling
+ * journal_invalidatepage() (via truncate_inode_pages()) to discard
+ * these buffers can cause data loss. Also even if we did not discard
+ * these buffers, we would have no way to find them after the inode
+ * is reaped and thus user could see stale data if he tries to read
+ * them before the transaction is checkpointed. So be careful and
+ * force everything to disk here... We use ei->i_datasync_tid to
+ * store the newest transaction containing inode's data.
+ *
+ * Note that directories do not have this problem because they don't
+ * use page cache.
+ */
+ if (inode->i_nlink && ext3_should_journal_data(inode) &&
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
+ journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
+
+ log_start_commit(journal, commit_tid);
+ log_wait_commit(journal, commit_tid);
+ filemap_write_and_wait(&inode->i_data);
+ }
truncate_inode_pages(&inode->i_data, 0);
ext3_discard_reservation(inode);
- rsv = EXT3_I(inode)->i_block_alloc_info;
- EXT3_I(inode)->i_block_alloc_info = NULL;
+ rsv = ei->i_block_alloc_info;
+ ei->i_block_alloc_info = NULL;
if (unlikely(rsv))
kfree(rsv);
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
if (inode->i_blocks)
ext3_truncate(inode);
/*
- * Kill off the orphan record which ext3_truncate created.
- * AKPM: I think this can be inside the above `if'.
- * Note that ext3_orphan_del() has to be able to cope with the
- * deletion of a non-existent orphan - this is because we don't
- * know if ext3_truncate() actually created an orphan record.
- * (Well, we could do this if we need to, but heck - it works)
+ * Kill off the orphan record created when the inode lost the last
+ * link. Note that ext3_orphan_del() has to be able to cope with the
+ * deletion of a non-existent orphan - ext3_truncate() could
+ * have removed the record.
*/
ext3_orphan_del(handle, inode);
- EXT3_I(inode)->i_dtime = get_seconds();
+ ei->i_dtime = get_seconds();
/*
* One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
ext3_fsblk_t first_block = 0;
+ trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
J_ASSERT(handle != NULL || create == 0);
depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
if (!create || err == -EIO)
goto cleanup;
+ /*
+ * Block out ext3_truncate while we alter the tree
+ */
mutex_lock(&ei->truncate_mutex);
/*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
*/
count = ext3_blks_to_allocate(partial, indirect_blks,
maxblocks, blocks_to_boundary);
- /*
- * Block out ext3_truncate while we alter the tree
- */
err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
offsets + (partial - chain), partial);
@@ -970,6 +999,9 @@ cleanup:
}
BUFFER_TRACE(bh_result, "returned");
out:
+ trace_ext3_get_blocks_exit(inode, iblock,
+ depth ? le32_to_cpu(chain[depth-1].key) : 0,
+ count, err);
return err;
}
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
ext3_truncate(inode);
}
+/*
+ * Truncate blocks that were not used by direct IO write. We have to zero out
+ * the last file block as well because direct IO might have written to it.
+ */
+static void ext3_truncate_failed_direct_write(struct inode *inode)
+{
+ ext3_block_truncate_page(inode, inode->i_size);
+ ext3_truncate(inode);
+}
+
static int ext3_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
* we allocate blocks but write fails for some reason */
int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
+ trace_ext3_write_begin(inode, pos, len, flags);
+
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
unsigned from, to;
int ret = 0, ret2;
+ trace_ext3_ordered_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
struct inode *inode = file->f_mapping->host;
int ret;
+ trace_ext3_writeback_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
update_file_sizes(inode, pos, copied);
/*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
{
handle_t *handle = ext3_journal_current_handle();
struct inode *inode = mapping->host;
+ struct ext3_inode_info *ei = EXT3_I(inode);
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
+ trace_ext3_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
- if (inode->i_size > EXT3_I(inode)->i_disksize) {
- EXT3_I(inode)->i_disksize = inode->i_size;
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
+ if (inode->i_size > ei->i_disksize) {
+ ei->i_disksize = inode->i_size;
ret2 = ext3_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
@@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_ordered_writepage(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_writeback_writepage(page);
if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page,
if (ext3_journal_current_handle())
goto no_write;
+ trace_ext3_journalled_writepage(page);
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page,
if (ret == 0)
ret = err;
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
+ atomic_set(&EXT3_I(inode)->i_datasync_tid,
+ handle->h_transaction->t_tid);
unlock_page(page);
} else {
/*
@@ -1739,6 +1793,7 @@ out_unlock:
static int ext3_readpage(struct file *file, struct page *page)
{
+ trace_ext3_readpage(page);
return mpage_readpage(page, ext3_get_block);
}
@@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_invalidatepage(page, offset);
+
/*
* If it's a full truncate we just forget about the pending dirtying
*/
@@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_releasepage(page);
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
@@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_length(iov, nr_segs);
int retries = 0;
+ trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+
if (rw == WRITE) {
loff_t final_size = offset + count;
@@ -1827,7 +1887,7 @@ retry:
loff_t end = offset + iov_length(iov, nr_segs);
if (end > isize)
- vmtruncate(inode, isize);
+ ext3_truncate_failed_direct_write(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -1841,7 +1901,7 @@ retry:
/* This is really bad luck. We've written the data
* but cannot extend i_size. Truncate allocated blocks
* and pretend the write failed... */
- ext3_truncate(inode);
+ ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
goto out;
}
@@ -1867,6 +1927,8 @@ retry:
ret = err;
}
out:
+ trace_ext3_direct_IO_exit(inode, offset,
+ iov_length(iov, nr_segs), rw, ret);
return ret;
}
@@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode)
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
- struct address_space *mapping, loff_t from)
+static int ext3_block_truncate_page(struct inode *inode, loff_t from)
{
ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned offset = from & (PAGE_CACHE_SIZE - 1);
unsigned blocksize, iblock, length, pos;
- struct inode *inode = mapping->host;
+ struct page *page;
+ handle_t *handle = NULL;
struct buffer_head *bh;
int err = 0;
+ /* Truncated on block boundary - nothing to do */
blocksize = inode->i_sb->s_blocksize;
+ if ((from & (blocksize - 1)) == 0)
+ return 0;
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page)
+ return -ENOMEM;
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
@@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
+ /* data=writeback mode doesn't need transaction to zero-out data */
+ if (!ext3_should_writeback_data(inode)) {
+ /* We journal at most one block */
+ handle = ext3_journal_start(inode, 1);
+ if (IS_ERR(handle)) {
+ clear_highpage(page);
+ flush_dcache_page(page);
+ err = PTR_ERR(handle);
+ goto unlock;
+ }
+ }
+
if (ext3_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext3_journal_get_write_access(handle, bh);
if (err)
- goto unlock;
+ goto stop;
}
zero_user(page, offset, length);
@@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
err = ext3_journal_dirty_data(handle, bh);
mark_buffer_dirty(bh);
}
+stop:
+ if (handle)
+ ext3_journal_stop(handle);
unlock:
unlock_page(page);
@@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
int ext3_can_truncate(struct inode *inode)
{
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return 0;
if (S_ISREG(inode->i_mode))
return 1;
if (S_ISDIR(inode->i_mode))
@@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode)
struct ext3_inode_info *ei = EXT3_I(inode);
__le32 *i_data = ei->i_data;
int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
int offsets[4];
Indirect chain[4];
Indirect *partial;
@@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode)
int n;
long last_block;
unsigned blocksize = inode->i_sb->s_blocksize;
- struct page *page;
+
+ trace_ext3_truncate_enter(inode);
if (!ext3_can_truncate(inode))
goto out_notrans;
@@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode)
if (inode->i_size == 0 && ext3_should_writeback_data(inode))
ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
- /*
- * We have to lock the EOF page here, because lock_page() nests
- * outside journal_start().
- */
- if ((inode->i_size & (blocksize - 1)) == 0) {
- /* Block boundary? Nothing to do */
- page = NULL;
- } else {
- page = grab_cache_page(mapping,
- inode->i_size >> PAGE_CACHE_SHIFT);
- if (!page)
- goto out_notrans;
- }
-
handle = start_transaction(inode);
- if (IS_ERR(handle)) {
- if (page) {
- clear_highpage(page);
- flush_dcache_page(page);
- unlock_page(page);
- page_cache_release(page);
- }
+ if (IS_ERR(handle))
goto out_notrans;
- }
last_block = (inode->i_size + blocksize-1)
>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
-
- if (page)
- ext3_block_truncate_page(handle, page, mapping, inode->i_size);
-
n = ext3_block_to_path(inode, last_block, offsets, NULL);
if (n == 0)
goto out_stop; /* error */
@@ -2596,6 +2653,7 @@ out_stop:
ext3_orphan_del(handle, inode);
ext3_journal_stop(handle);
+ trace_ext3_truncate_exit(inode);
return;
out_notrans:
/*
@@ -2604,6 +2662,7 @@ out_notrans:
*/
if (inode->i_nlink)
ext3_orphan_del(NULL, inode);
+ trace_ext3_truncate_exit(inode);
}
static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2745,6 +2804,7 @@ make_io:
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
+ trace_ext3_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ_META, bh);
@@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
}
error = ext3_orphan_add(handle, inode);
+ if (error) {
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
EXT3_I(inode)->i_disksize = attr->ia_size;
- rc = ext3_mark_inode_dirty(handle, inode);
- if (!error)
- error = rc;
+ error = ext3_mark_inode_dirty(handle, inode);
ext3_journal_stop(handle);
+ if (error) {
+ /* Some hard fs error must have happened. Bail out. */
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ rc = ext3_block_truncate_page(inode, attr->ia_size);
+ if (rc) {
+ /* Cleanup orphan list and exit */
+ handle = ext3_journal_start(inode, 3);
+ if (IS_ERR(handle)) {
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ ext3_orphan_del(handle, inode);
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
}
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
- rc = vmtruncate(inode, attr->ia_size);
- if (rc)
- goto err_out;
+ truncate_setsize(inode, attr->ia_size);
+ ext3_truncate(inode);
}
setattr_copy(inode, attr);
@@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
int err;
might_sleep();
+ trace_ext3_mark_inode_dirty(inode, _RET_IP_);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (!err)
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index f4090bd2f345..c7f43944f160 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -285,7 +285,7 @@ group_add_out:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (copy_from_user(&range, (struct fstrim_range *)arg,
+ if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
@@ -293,7 +293,7 @@ group_add_out:
if (ret < 0)
return ret;
- if (copy_to_user((struct fstrim_range *)arg, &range,
+ if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 3b57230a17bb..6e18a0b7750d 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
+#include <trace/events/ext3.h>
#include "namei.h"
#include "xattr.h"
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
while (len--) printk("%c", *name++);
ext3fs_dirhash(de->name, de->name_len, &h);
printk(":%x.%u ", h.hash,
- ((char *) de - base));
+ (unsigned) ((char *) de - base));
}
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
@@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
*err = -ENOENT;
errout:
- dxtrace(printk("%s not found\n", name));
+ dxtrace(printk("%s not found\n", entry->name));
dx_release (frames);
return NULL;
}
@@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
struct ext3_dir_entry_2 * de;
handle_t *handle;
+ trace_ext3_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
dquot_initialize(dir);
@@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
end_unlink:
ext3_journal_stop(handle);
brelse (bh);
+ trace_ext3_unlink_exit(dentry, retval);
return retval;
}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b57ea2f91269..7beb69ae0015 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,9 @@
#include "acl.h"
#include "namei.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext3.h>
+
#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
#else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
+static int ext3_drop_inode(struct inode *inode)
+{
+ int drop = generic_drop_inode(inode);
+
+ trace_ext3_drop_inode(inode, drop);
+ return drop;
+}
+
static void ext3_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = {
.destroy_inode = ext3_destroy_inode,
.write_inode = ext3_write_inode,
.dirty_inode = ext3_dirty_inode,
+ .drop_inode = ext3_drop_inode,
.evict_inode = ext3_evict_inode,
.put_super = ext3_put_super,
.sync_fs = ext3_sync_fs,
@@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
{
tid_t target;
+ trace_ext3_sync_fs(sb, wait);
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 32e6cc23bd9a..d565759d82ee 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -803,8 +803,16 @@ inserted:
/* We need to allocate a new block */
ext3_fsblk_t goal = ext3_group_first_block_no(sb,
EXT3_I(inode)->i_block_group);
- ext3_fsblk_t block = ext3_new_block(handle, inode,
- goal, &error);
+ ext3_fsblk_t block;
+
+ /*
+ * Protect us agaist concurrent allocations to the
+ * same inode from ext3_..._writepage(). Reservation
+ * code does not expect racing allocations.
+ */
+ mutex_lock(&EXT3_I(inode)->truncate_mutex);
+ block = ext3_new_block(handle, inode, goal, &error);
+ mutex_unlock(&EXT3_I(inode)->truncate_mutex);
if (error)
goto cleanup;
ea_idebug(inode, "creating block %d", block);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 678cde834f19..3e5191f9f398 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2741,7 +2741,7 @@ static int write_cache_pages_da(struct address_space *mapping,
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
@@ -2973,7 +2973,7 @@ static int ext4_da_writepages(struct address_space *mapping,
}
retry:
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
while (!ret && wbc->nr_to_write > 0) {
diff --git a/fs/file_table.c b/fs/file_table.c
index 01e4c1e8e6b6..c322794f7360 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -25,7 +25,7 @@
#include <linux/percpu.h>
#include <linux/ima.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "internal.h"
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b8c507ca42f7..1599aa985fe2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -35,7 +35,9 @@
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
+ unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
+ unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
@@ -180,12 +182,13 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
*/
void inode_wb_list_del(struct inode *inode)
{
- spin_lock(&inode_wb_list_lock);
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
+
+ spin_lock(&bdi->wb.list_lock);
list_del_init(&inode->i_wb_list);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&bdi->wb.list_lock);
}
-
/*
* Redirty an inode: set its when-it-was dirtied timestamp and move it to the
* furthest end of its superblock's dirty-inode list.
@@ -195,11 +198,9 @@ void inode_wb_list_del(struct inode *inode)
* the case then the inode must have been redirtied while it was being written
* out and we don't reset its dirtied_when.
*/
-static void redirty_tail(struct inode *inode)
+static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
{
- struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-
- assert_spin_locked(&inode_wb_list_lock);
+ assert_spin_locked(&wb->list_lock);
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -213,11 +214,9 @@ static void redirty_tail(struct inode *inode)
/*
* requeue inode for re-scanning after bdi->b_io list is exhausted.
*/
-static void requeue_io(struct inode *inode)
+static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
{
- struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-
- assert_spin_locked(&inode_wb_list_lock);
+ assert_spin_locked(&wb->list_lock);
list_move(&inode->i_wb_list, &wb->b_more_io);
}
@@ -225,7 +224,7 @@ static void inode_sync_complete(struct inode *inode)
{
/*
* Prevent speculative execution through
- * spin_unlock(&inode_wb_list_lock);
+ * spin_unlock(&wb->list_lock);
*/
smp_mb();
@@ -250,15 +249,16 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
/*
* Move expired dirty inodes from @delaying_queue to @dispatch_queue.
*/
-static void move_expired_inodes(struct list_head *delaying_queue,
+static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
- unsigned long *older_than_this)
+ unsigned long *older_than_this)
{
LIST_HEAD(tmp);
struct list_head *pos, *node;
struct super_block *sb = NULL;
struct inode *inode;
int do_sb_sort = 0;
+ int moved = 0;
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
@@ -269,12 +269,13 @@ static void move_expired_inodes(struct list_head *delaying_queue,
do_sb_sort = 1;
sb = inode->i_sb;
list_move(&inode->i_wb_list, &tmp);
+ moved++;
}
/* just one sb in list, splice to dispatch_queue and we're done */
if (!do_sb_sort) {
list_splice(&tmp, dispatch_queue);
- return;
+ goto out;
}
/* Move inodes from one superblock together */
@@ -286,6 +287,8 @@ static void move_expired_inodes(struct list_head *delaying_queue,
list_move(&inode->i_wb_list, dispatch_queue);
}
}
+out:
+ return moved;
}
/*
@@ -301,9 +304,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
*/
static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
{
- assert_spin_locked(&inode_wb_list_lock);
+ int moved;
+ assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
- move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
+ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
+ trace_writeback_queue_io(wb, older_than_this, moved);
}
static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -316,7 +321,8 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
/*
* Wait for writeback on an inode to complete.
*/
-static void inode_wait_for_writeback(struct inode *inode)
+static void inode_wait_for_writeback(struct inode *inode,
+ struct bdi_writeback *wb)
{
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
wait_queue_head_t *wqh;
@@ -324,15 +330,15 @@ static void inode_wait_for_writeback(struct inode *inode)
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) {
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&wb->list_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
- spin_lock(&inode_wb_list_lock);
+ spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
}
}
/*
- * Write out an inode's dirty pages. Called under inode_wb_list_lock and
+ * Write out an inode's dirty pages. Called under wb->list_lock and
* inode->i_lock. Either the caller has an active reference on the inode or
* the inode has I_WILL_FREE set.
*
@@ -343,13 +349,15 @@ static void inode_wait_for_writeback(struct inode *inode)
* livelocks, etc.
*/
static int
-writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
+writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
struct address_space *mapping = inode->i_mapping;
+ long nr_to_write = wbc->nr_to_write;
unsigned dirty;
int ret;
- assert_spin_locked(&inode_wb_list_lock);
+ assert_spin_locked(&wb->list_lock);
assert_spin_locked(&inode->i_lock);
if (!atomic_read(&inode->i_count))
@@ -367,14 +375,16 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* completed a full scan of b_io.
*/
if (wbc->sync_mode != WB_SYNC_ALL) {
- requeue_io(inode);
+ requeue_io(inode, wb);
+ trace_writeback_single_inode_requeue(inode, wbc,
+ nr_to_write);
return 0;
}
/*
* It's a data-integrity sync. We must wait.
*/
- inode_wait_for_writeback(inode);
+ inode_wait_for_writeback(inode, wb);
}
BUG_ON(inode->i_state & I_SYNC);
@@ -383,7 +393,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
inode->i_state |= I_SYNC;
inode->i_state &= ~I_DIRTY_PAGES;
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&wb->list_lock);
ret = do_writepages(mapping, wbc);
@@ -414,10 +424,19 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
ret = err;
}
- spin_lock(&inode_wb_list_lock);
+ spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
+ /*
+ * Sync livelock prevention. Each inode is tagged and synced in
+ * one shot. If still dirty, it will be redirty_tail()'ed below.
+ * Update the dirty time to prevent enqueue and sync it again.
+ */
+ if ((inode->i_state & I_DIRTY) &&
+ (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
+ inode->dirtied_when = jiffies;
+
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
/*
* We didn't write back all the pages. nfs_writepages()
@@ -428,7 +447,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
/*
* slice used up: queue for next turn
*/
- requeue_io(inode);
+ requeue_io(inode, wb);
} else {
/*
* Writeback blocked by something other than
@@ -437,7 +456,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* retrying writeback of the dirty page/inode
* that cannot be performed immediately.
*/
- redirty_tail(inode);
+ redirty_tail(inode, wb);
}
} else if (inode->i_state & I_DIRTY) {
/*
@@ -446,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* submission or metadata updates after data IO
* completion.
*/
- redirty_tail(inode);
+ redirty_tail(inode, wb);
} else {
/*
* The inode is clean. At this point we either have
@@ -457,9 +476,41 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
}
inode_sync_complete(inode);
+ trace_writeback_single_inode(inode, wbc, nr_to_write);
return ret;
}
+static long writeback_chunk_size(struct backing_dev_info *bdi,
+ struct wb_writeback_work *work)
+{
+ long pages;
+
+ /*
+ * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
+ * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
+ * here avoids calling into writeback_inodes_wb() more than once.
+ *
+ * The intended call sequence for WB_SYNC_ALL writeback is:
+ *
+ * wb_writeback()
+ * writeback_sb_inodes() <== called only once
+ * write_cache_pages() <== called once for each inode
+ * (quickly) tag currently dirty pages
+ * (maybe slowly) sync all tagged pages
+ */
+ if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
+ pages = LONG_MAX;
+ else {
+ pages = min(bdi->avg_write_bandwidth / 2,
+ global_dirty_limit / DIRTY_SCOPE);
+ pages = min(pages, work->nr_pages);
+ pages = round_down(pages + MIN_WRITEBACK_PAGES,
+ MIN_WRITEBACK_PAGES);
+ }
+
+ return pages;
+}
+
/*
* Write a portion of b_io inodes which belong to @sb.
*
@@ -467,24 +518,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
*
- * Return 1, if the caller writeback routine should be
- * interrupted. Otherwise return 0.
+ * Return the number of pages and/or inodes written.
*/
-static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
- struct writeback_control *wbc, bool only_this_sb)
+static long writeback_sb_inodes(struct super_block *sb,
+ struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
{
+ struct writeback_control wbc = {
+ .sync_mode = work->sync_mode,
+ .tagged_writepages = work->tagged_writepages,
+ .for_kupdate = work->for_kupdate,
+ .for_background = work->for_background,
+ .range_cyclic = work->range_cyclic,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ };
+ unsigned long start_time = jiffies;
+ long write_chunk;
+ long wrote = 0; /* count both pages and inodes */
+
while (!list_empty(&wb->b_io)) {
- long pages_skipped;
struct inode *inode = wb_inode(wb->b_io.prev);
if (inode->i_sb != sb) {
- if (only_this_sb) {
+ if (work->sb) {
/*
* We only want to write back data for this
* superblock, move all inodes not belonging
* to it back onto the dirty list.
*/
- redirty_tail(inode);
+ redirty_tail(inode, wb);
continue;
}
@@ -493,7 +556,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
* Bounce back to the caller to unpin this and
* pin the next superblock.
*/
- return 0;
+ break;
}
/*
@@ -504,95 +567,91 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
spin_unlock(&inode->i_lock);
- requeue_io(inode);
+ redirty_tail(inode, wb);
continue;
}
-
- /*
- * Was this inode dirtied after sync_sb_inodes was called?
- * This keeps sync from extra jobs and livelock.
- */
- if (inode_dirtied_after(inode, wbc->wb_start)) {
- spin_unlock(&inode->i_lock);
- return 1;
- }
-
__iget(inode);
+ write_chunk = writeback_chunk_size(wb->bdi, work);
+ wbc.nr_to_write = write_chunk;
+ wbc.pages_skipped = 0;
- pages_skipped = wbc->pages_skipped;
- writeback_single_inode(inode, wbc);
- if (wbc->pages_skipped != pages_skipped) {
+ writeback_single_inode(inode, wb, &wbc);
+
+ work->nr_pages -= write_chunk - wbc.nr_to_write;
+ wrote += write_chunk - wbc.nr_to_write;
+ if (!(inode->i_state & I_DIRTY))
+ wrote++;
+ if (wbc.pages_skipped) {
/*
* writeback is not making progress due to locked
* buffers. Skip this inode for now.
*/
- redirty_tail(inode);
+ redirty_tail(inode, wb);
}
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&wb->list_lock);
iput(inode);
cond_resched();
- spin_lock(&inode_wb_list_lock);
- if (wbc->nr_to_write <= 0) {
- wbc->more_io = 1;
- return 1;
+ spin_lock(&wb->list_lock);
+ /*
+ * bail out to wb_writeback() often enough to check
+ * background threshold and other termination conditions.
+ */
+ if (wrote) {
+ if (time_is_before_jiffies(start_time + HZ / 10UL))
+ break;
+ if (work->nr_pages <= 0)
+ break;
}
- if (!list_empty(&wb->b_more_io))
- wbc->more_io = 1;
}
- /* b_io is empty */
- return 1;
+ return wrote;
}
-void writeback_inodes_wb(struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static long __writeback_inodes_wb(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
{
- int ret = 0;
-
- if (!wbc->wb_start)
- wbc->wb_start = jiffies; /* livelock avoidance */
- spin_lock(&inode_wb_list_lock);
- if (!wbc->for_kupdate || list_empty(&wb->b_io))
- queue_io(wb, wbc->older_than_this);
+ unsigned long start_time = jiffies;
+ long wrote = 0;
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
struct super_block *sb = inode->i_sb;
if (!grab_super_passive(sb)) {
- requeue_io(inode);
+ requeue_io(inode, wb);
continue;
}
- ret = writeback_sb_inodes(sb, wb, wbc, false);
+ wrote += writeback_sb_inodes(sb, wb, work);
drop_super(sb);
- if (ret)
- break;
+ /* refer to the same tests at the end of writeback_sb_inodes */
+ if (wrote) {
+ if (time_is_before_jiffies(start_time + HZ / 10UL))
+ break;
+ if (work->nr_pages <= 0)
+ break;
+ }
}
- spin_unlock(&inode_wb_list_lock);
/* Leave any unwritten inodes on b_io */
+ return wrote;
}
-static void __writeback_inodes_sb(struct super_block *sb,
- struct bdi_writeback *wb, struct writeback_control *wbc)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
{
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ struct wb_writeback_work work = {
+ .nr_pages = nr_pages,
+ .sync_mode = WB_SYNC_NONE,
+ .range_cyclic = 1,
+ };
- spin_lock(&inode_wb_list_lock);
- if (!wbc->for_kupdate || list_empty(&wb->b_io))
- queue_io(wb, wbc->older_than_this);
- writeback_sb_inodes(sb, wb, wbc, true);
- spin_unlock(&inode_wb_list_lock);
-}
+ spin_lock(&wb->list_lock);
+ if (list_empty(&wb->b_io))
+ queue_io(wb, NULL);
+ __writeback_inodes_wb(wb, &work);
+ spin_unlock(&wb->list_lock);
-/*
- * The maximum number of pages to writeout in a single bdi flush/kupdate
- * operation. We do this so we don't hold I_SYNC against an inode for
- * enormous amounts of time, which would block a userspace task which has
- * been forced to throttle against that inode. Also, the code reevaluates
- * the dirty each time it has written this many pages.
- */
-#define MAX_WRITEBACK_PAGES 1024
+ return nr_pages - work.nr_pages;
+}
static inline bool over_bground_thresh(void)
{
@@ -605,6 +664,16 @@ static inline bool over_bground_thresh(void)
}
/*
+ * Called under wb->list_lock. If there are multiple wb per bdi,
+ * only the flusher working on the first wb should do it.
+ */
+static void wb_update_bandwidth(struct bdi_writeback *wb,
+ unsigned long start_time)
+{
+ __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time);
+}
+
+/*
* Explicit flushing or periodic writeback of "old" data.
*
* Define "old": the first time one of an inode's pages is dirtied, we mark the
@@ -622,47 +691,16 @@ static inline bool over_bground_thresh(void)
static long wb_writeback(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
- struct writeback_control wbc = {
- .sync_mode = work->sync_mode,
- .older_than_this = NULL,
- .for_kupdate = work->for_kupdate,
- .for_background = work->for_background,
- .range_cyclic = work->range_cyclic,
- };
+ unsigned long wb_start = jiffies;
+ long nr_pages = work->nr_pages;
unsigned long oldest_jif;
- long wrote = 0;
- long write_chunk;
struct inode *inode;
+ long progress;
- if (wbc.for_kupdate) {
- wbc.older_than_this = &oldest_jif;
- oldest_jif = jiffies -
- msecs_to_jiffies(dirty_expire_interval * 10);
- }
- if (!wbc.range_cyclic) {
- wbc.range_start = 0;
- wbc.range_end = LLONG_MAX;
- }
+ oldest_jif = jiffies;
+ work->older_than_this = &oldest_jif;
- /*
- * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
- * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
- * here avoids calling into writeback_inodes_wb() more than once.
- *
- * The intended call sequence for WB_SYNC_ALL writeback is:
- *
- * wb_writeback()
- * __writeback_inodes_sb() <== called only once
- * write_cache_pages() <== called once for each inode
- * (quickly) tag currently dirty pages
- * (maybe slowly) sync all tagged pages
- */
- if (wbc.sync_mode == WB_SYNC_NONE)
- write_chunk = MAX_WRITEBACK_PAGES;
- else
- write_chunk = LONG_MAX;
-
- wbc.wb_start = jiffies; /* livelock avoidance */
+ spin_lock(&wb->list_lock);
for (;;) {
/*
* Stop writeback when nr_pages has been consumed
@@ -687,52 +725,54 @@ static long wb_writeback(struct bdi_writeback *wb,
if (work->for_background && !over_bground_thresh())
break;
- wbc.more_io = 0;
- wbc.nr_to_write = write_chunk;
- wbc.pages_skipped = 0;
+ if (work->for_kupdate) {
+ oldest_jif = jiffies -
+ msecs_to_jiffies(dirty_expire_interval * 10);
+ work->older_than_this = &oldest_jif;
+ }
- trace_wbc_writeback_start(&wbc, wb->bdi);
+ trace_writeback_start(wb->bdi, work);
+ if (list_empty(&wb->b_io))
+ queue_io(wb, work->older_than_this);
if (work->sb)
- __writeback_inodes_sb(work->sb, wb, &wbc);
+ progress = writeback_sb_inodes(work->sb, wb, work);
else
- writeback_inodes_wb(wb, &wbc);
- trace_wbc_writeback_written(&wbc, wb->bdi);
+ progress = __writeback_inodes_wb(wb, work);
+ trace_writeback_written(wb->bdi, work);
- work->nr_pages -= write_chunk - wbc.nr_to_write;
- wrote += write_chunk - wbc.nr_to_write;
+ wb_update_bandwidth(wb, wb_start);
/*
- * If we consumed everything, see if we have more
+ * Did we write something? Try for more
+ *
+ * Dirty inodes are moved to b_io for writeback in batches.
+ * The completion of the current batch does not necessarily
+ * mean the overall work is done. So we keep looping as long
+ * as made some progress on cleaning pages or inodes.
*/
- if (wbc.nr_to_write <= 0)
+ if (progress)
continue;
/*
- * Didn't write everything and we don't have more IO, bail
+ * No more inodes for IO, bail
*/
- if (!wbc.more_io)
+ if (list_empty(&wb->b_more_io))
break;
/*
- * Did we write something? Try for more
- */
- if (wbc.nr_to_write < write_chunk)
- continue;
- /*
* Nothing written. Wait for some inode to
* become available for writeback. Otherwise
* we'll just busyloop.
*/
- spin_lock(&inode_wb_list_lock);
if (!list_empty(&wb->b_more_io)) {
+ trace_writeback_wait(wb->bdi, work);
inode = wb_inode(wb->b_more_io.prev);
- trace_wbc_writeback_wait(&wbc, wb->bdi);
spin_lock(&inode->i_lock);
- inode_wait_for_writeback(inode);
+ inode_wait_for_writeback(inode, wb);
spin_unlock(&inode->i_lock);
}
- spin_unlock(&inode_wb_list_lock);
}
+ spin_unlock(&wb->list_lock);
- return wrote;
+ return nr_pages - work->nr_pages;
}
/*
@@ -1063,10 +1103,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
spin_unlock(&inode->i_lock);
- spin_lock(&inode_wb_list_lock);
+ spin_lock(&bdi->wb.list_lock);
inode->dirtied_when = jiffies;
list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&bdi->wb.list_lock);
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
@@ -1162,10 +1202,11 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
{
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
- .sb = sb,
- .sync_mode = WB_SYNC_NONE,
- .done = &done,
- .nr_pages = nr,
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
+ .tagged_writepages = 1,
+ .done = &done,
+ .nr_pages = nr,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1267,6 +1308,7 @@ EXPORT_SYMBOL(sync_inodes_sb);
*/
int write_inode_now(struct inode *inode, int sync)
{
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
int ret;
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
@@ -1279,11 +1321,11 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0;
might_sleep();
- spin_lock(&inode_wb_list_lock);
+ spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
- ret = writeback_single_inode(inode, &wbc);
+ ret = writeback_single_inode(inode, wb, &wbc);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&wb->list_lock);
if (sync)
inode_sync_wait(inode);
return ret;
@@ -1303,13 +1345,14 @@ EXPORT_SYMBOL(write_inode_now);
*/
int sync_inode(struct inode *inode, struct writeback_control *wbc)
{
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
int ret;
- spin_lock(&inode_wb_list_lock);
+ spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
- ret = writeback_single_inode(inode, wbc);
+ ret = writeback_single_inode(inode, wb, wbc);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_wb_list_lock);
+ spin_unlock(&wb->list_lock);
return ret;
}
EXPORT_SYMBOL(sync_inode);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 29e1ace7953d..8a139ff1919f 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -16,7 +16,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/rcupdate.h>
#include <linux/rculist_bl.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/inode.c b/fs/inode.c
index 96c77b81167c..d0c72ff6b30e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -37,7 +37,7 @@
* inode->i_sb->s_inode_lru, inode->i_lru
* inode_sb_list_lock protects:
* sb->s_inodes, inode->i_sb_list
- * inode_wb_list_lock protects:
+ * bdi->wb.list_lock protects:
* bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
* inode_hash_lock protects:
* inode_hashtable, inode->i_hash
@@ -48,7 +48,7 @@
* inode->i_lock
* inode->i_sb->s_inode_lru_lock
*
- * inode_wb_list_lock
+ * bdi->wb.list_lock
* inode->i_lock
*
* inode_hash_lock
@@ -65,7 +65,6 @@ static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
/*
* Empty aops. Can be used for the cases where the user does not
@@ -362,9 +361,11 @@ EXPORT_SYMBOL_GPL(inode_sb_list_add);
static inline void inode_sb_list_del(struct inode *inode)
{
- spin_lock(&inode_sb_list_lock);
- list_del_init(&inode->i_sb_list);
- spin_unlock(&inode_sb_list_lock);
+ if (!list_empty(&inode->i_sb_list)) {
+ spin_lock(&inode_sb_list_lock);
+ list_del_init(&inode->i_sb_list);
+ spin_unlock(&inode_sb_list_lock);
+ }
}
static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -797,6 +798,29 @@ unsigned int get_next_ino(void)
EXPORT_SYMBOL(get_next_ino);
/**
+ * new_inode_pseudo - obtain an inode
+ * @sb: superblock
+ *
+ * Allocates a new inode for given superblock.
+ * Inode wont be chained in superblock s_inodes list
+ * This means :
+ * - fs can't be unmount
+ * - quotas, fsnotify, writeback can't work
+ */
+struct inode *new_inode_pseudo(struct super_block *sb)
+{
+ struct inode *inode = alloc_inode(sb);
+
+ if (inode) {
+ spin_lock(&inode->i_lock);
+ inode->i_state = 0;
+ spin_unlock(&inode->i_lock);
+ INIT_LIST_HEAD(&inode->i_sb_list);
+ }
+ return inode;
+}
+
+/**
* new_inode - obtain an inode
* @sb: superblock
*
@@ -814,13 +838,9 @@ struct inode *new_inode(struct super_block *sb)
spin_lock_prefetch(&inode_sb_list_lock);
- inode = alloc_inode(sb);
- if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_state = 0;
- spin_unlock(&inode->i_lock);
+ inode = new_inode_pseudo(sb);
+ if (inode)
inode_sb_list_add(inode);
- }
return inode;
}
EXPORT_SYMBOL(new_inode);
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e4b87bc1fa56..f94fc48ff3a0 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -22,6 +22,8 @@
#include <linux/jbd.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <trace/events/jbd.h>
/*
* Unlink a buffer from a transaction checkpoint list.
@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+ /*
+ * Get our reference so that bh cannot be freed before
+ * we unlock it
+ */
+ get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
} else {
@@ -220,8 +226,8 @@ restart:
spin_lock(&journal->j_list_lock);
goto restart;
}
+ get_bh(bh);
if (buffer_locked(bh)) {
- get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
@@ -240,7 +246,6 @@ restart:
*/
released = __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
__brelse(bh);
}
@@ -253,9 +258,12 @@ static void
__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
{
int i;
+ struct blk_plug plug;
+ blk_start_plug(&plug);
for (i = 0; i < *batch_count; i++)
- write_dirty_buffer(bhs[i], WRITE);
+ write_dirty_buffer(bhs[i], WRITE_SYNC);
+ blk_finish_plug(&plug);
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = bhs[i];
@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
+ get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
__brelse(bh);
} else {
/*
@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal)
* journal straight away.
*/
result = cleanup_journal_tail(journal);
+ trace_jbd_checkpoint(journal, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal)
if (blocknr < journal->j_tail)
freed = freed + journal->j_last - journal->j_first;
+ trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
jbd_debug(1,
"Cleaning journal tail from %d to %d (offset %u), "
"freeing %u\n",
@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal)
/*
* journal_clean_one_cp_list
*
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and release
+ * them.
*
- * Called with the journal locked.
* Called with j_list_lock held.
* Returns number of bufers reaped (for debug)
*/
@@ -632,8 +642,8 @@ out:
* checkpoint lists.
*
* The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
*
- * This function is called with the journal locked.
* This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
}
journal = transaction->t_journal;
+ JBUFFER_TRACE(jh, "removing from transaction");
__buffer_unlink(jh);
jh->b_cp_transaction = NULL;
+ journal_put_journal_head(jh);
if (transaction->t_checkpoint_list != NULL ||
transaction->t_checkpoint_io_list != NULL)
goto out;
- JBUFFER_TRACE(jh, "transaction has no more buffers");
/*
* There is one special case to worry about: if we have just pulled the
@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
* The locking here around t_state is a bit sleazy.
* See the comment at the end of journal_commit_transaction().
*/
- if (transaction->t_state != T_FINISHED) {
- JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+ if (transaction->t_state != T_FINISHED)
goto out;
- }
/* OK, that was the last buffer for the transaction: we can now
safely remove this transaction from the log */
@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
wake_up(&journal->j_wait_logspace);
ret = 1;
out:
- JBUFFER_TRACE(jh, "exit");
return ret;
}
@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ /* Get reference for checkpointing transaction */
+ journal_grab_journal_head(jh2bh(jh));
jh->b_cp_transaction = transaction;
if (!transaction->t_checkpoint_list) {
@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
+ trace_jbd_drop_transaction(journal, transaction);
jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
kfree(transaction);
}
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 72ffa974b0b8..8799207df058 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -21,6 +21,7 @@
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <trace/events/jbd.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@@ -204,6 +205,8 @@ write_out_data:
if (!trylock_buffer(bh)) {
BUFFER_TRACE(bh, "needs blocking lock");
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal,
+ commit_transaction);
/* Write out all data to prevent deadlocks */
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
@@ -236,6 +239,8 @@ write_out_data:
jbd_unlock_bh_state(bh);
if (bufs == journal->j_wbufsize) {
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal,
+ commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
goto write_out_data;
@@ -253,10 +258,6 @@ write_out_data:
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
- journal_remove_journal_head(bh);
- /* One for our safety reference, other for
- * journal_remove_journal_head() */
- put_bh(bh);
release_data_buffer(bh);
}
@@ -266,6 +267,7 @@ write_out_data:
}
}
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal, commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
return err;
@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal)
commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
+ trace_jbd_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
+ trace_jbd_commit_locking(journal, commit_transaction);
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
DEFINE_WAIT(wait);
@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal)
*/
journal_switch_revoke_table(journal);
+ trace_jbd_commit_flushing(journal, commit_transaction);
commit_transaction->t_state = T_FLUSH;
journal->j_committing_transaction = commit_transaction;
journal->j_running_transaction = NULL;
@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
}
if (buffer_jbd(bh) && bh2jh(bh) == jh &&
jh->b_transaction == commit_transaction &&
- jh->b_jlist == BJ_Locked) {
+ jh->b_jlist == BJ_Locked)
__journal_unfile_buffer(jh);
- jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
- put_bh(bh);
- } else {
- jbd_unlock_bh_state(bh);
- }
+ jbd_unlock_bh_state(bh);
release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock);
}
@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal)
commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock);
+ trace_jbd_commit_logging(journal, commit_transaction);
J_ASSERT(commit_transaction->t_nr_buffers <=
commit_transaction->t_outstanding_credits);
@@ -797,10 +798,16 @@ restart_loop:
while (commit_transaction->t_forget) {
transaction_t *cp_transaction;
struct buffer_head *bh;
+ int try_to_free = 0;
jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh);
+ /*
+ * Get a reference so that bh cannot be freed before we are
+ * done with it.
+ */
+ get_bh(bh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
jh->b_transaction == journal->j_running_transaction);
@@ -858,28 +865,27 @@ restart_loop:
__journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh);
- JBUFFER_TRACE(jh, "refile for checkpoint writeback");
- __journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
} else {
J_ASSERT_BH(bh, !buffer_dirty(bh));
- /* The buffer on BJ_Forget list and not jbddirty means
+ /*
+ * The buffer on BJ_Forget list and not jbddirty means
* it has been freed by this transaction and hence it
* could not have been reallocated until this
* transaction has committed. *BUT* it could be
* reallocated once we have written all the data to
* disk and before we process the buffer on BJ_Forget
- * list. */
- JBUFFER_TRACE(jh, "refile or unfile freed buffer");
- __journal_refile_buffer(jh);
- if (!jh->b_transaction) {
- jbd_unlock_bh_state(bh);
- /* needs a brelse */
- journal_remove_journal_head(bh);
- release_buffer_page(bh);
- } else
- jbd_unlock_bh_state(bh);
+ * list.
+ */
+ if (!jh->b_next_transaction)
+ try_to_free = 1;
}
+ JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+ __journal_refile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ if (try_to_free)
+ release_buffer_page(bh);
+ else
+ __brelse(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
@@ -946,6 +952,7 @@ restart_loop:
}
spin_unlock(&journal->j_list_lock);
+ trace_jbd_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e2d4285fbe90..9fe061fb8779 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -38,6 +38,9 @@
#include <linux/debugfs.h>
#include <linux/ratelimit.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd.h>
+
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait)
} else
write_dirty_buffer(bh, WRITE);
+ trace_jbd_update_superblock_end(journal, wait);
out:
/* If we have just flushed the log (by marking s_start==0), then
* any future commit will have to be careful to update the
@@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count.
*
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the
@@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
* (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = journal_add_journal_head(bh);
* ...
- * jh->b_transaction = xxx;
- * journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
+ * (Get another reference for transaction)
+ * journal_grab_journal_head(bh);
+ * jh->b_transaction = xxx;
+ * (Put original reference)
+ * journal_put_journal_head(jh);
*/
/*
* Give a buffer_head a journal_head.
*
- * Doesn't need the journal lock.
* May sleep.
*/
struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
- get_bh(bh);
- if (jh->b_jcount == 0) {
- if (jh->b_transaction == NULL &&
- jh->b_next_transaction == NULL &&
- jh->b_cp_transaction == NULL) {
- J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
- J_ASSERT_BH(bh, buffer_jbd(bh));
- J_ASSERT_BH(bh, jh2bh(jh) == bh);
- BUFFER_TRACE(bh, "remove journal_head");
- if (jh->b_frozen_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_frozen_data\n",
- __func__);
- jbd_free(jh->b_frozen_data, bh->b_size);
- }
- if (jh->b_committed_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_committed_data\n",
- __func__);
- jbd_free(jh->b_committed_data, bh->b_size);
- }
- bh->b_private = NULL;
- jh->b_bh = NULL; /* debug, really */
- clear_buffer_jbd(bh);
- __brelse(bh);
- journal_free_journal_head(jh);
- } else {
- BUFFER_TRACE(bh, "journal_head was locked");
- }
+ J_ASSERT_JH(jh, jh->b_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+ J_ASSERT_BH(bh, buffer_jbd(bh));
+ J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ BUFFER_TRACE(bh, "remove journal_head");
+ if (jh->b_frozen_data) {
+ printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+ jbd_free(jh->b_frozen_data, bh->b_size);
}
+ if (jh->b_committed_data) {
+ printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+ jbd_free(jh->b_committed_data, bh->b_size);
+ }
+ bh->b_private = NULL;
+ jh->b_bh = NULL; /* debug, really */
+ clear_buffer_jbd(bh);
+ journal_free_journal_head(jh);
}
/*
- * journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head. If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
- * time. Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void journal_remove_journal_head(struct buffer_head *bh)
-{
- jbd_lock_bh_journal_head(bh);
- __journal_remove_journal_head(bh);
- jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head. If it fell to zero then try to
+ * Drop a reference on the passed journal_head. If it fell to zero then
* release the journal_head from the buffer_head.
*/
void journal_put_journal_head(struct journal_head *jh)
@@ -1953,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh)
jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount;
- if (!jh->b_jcount && !jh->b_transaction) {
+ if (!jh->b_jcount) {
__journal_remove_journal_head(bh);
+ jbd_unlock_bh_journal_head(bh);
__brelse(bh);
- }
- jbd_unlock_bh_journal_head(bh);
+ } else
+ jbd_unlock_bh_journal_head(bh);
}
/*
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f7ee81a065da..7e59c6e66f9b 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
static void __journal_temp_unlink_buffer(struct journal_head *jh);
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = kzalloc(sizeof(*new_transaction),
- GFP_NOFS|__GFP_NOFAIL);
+ new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
if (!new_transaction) {
- ret = -ENOMEM;
- goto out;
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto alloc_transaction;
}
}
@@ -696,7 +696,6 @@ repeat:
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
- jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
* committed and so it's safe to clear the dirty bit.
*/
clear_buffer_dirty(jh2bh(jh));
- jh->b_transaction = transaction;
/* first access by this transaction */
jh->b_modified = 0;
@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
*/
JBUFFER_TRACE(jh, "cancelling revoke");
journal_cancel_revoke(handle, jh);
- journal_put_journal_head(jh);
out:
+ journal_put_journal_head(jh);
return err;
}
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
ret = -EIO;
goto no_journal;
}
-
- if (jh->b_transaction != NULL) {
+ /* We might have slept so buffer could be refiled now */
+ if (jh->b_transaction != NULL &&
+ jh->b_transaction != handle->h_transaction) {
JBUFFER_TRACE(jh, "unfile from commit");
__journal_temp_unlink_buffer(jh);
/* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
JBUFFER_TRACE(jh, "not on correct data list: unfile");
J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
- __journal_temp_unlink_buffer(jh);
- jh->b_transaction = handle->h_transaction;
JBUFFER_TRACE(jh, "file as data");
__journal_file_buffer(jh, handle->h_transaction,
BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
__journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__journal_unfile_buffer(jh);
- journal_remove_journal_head(bh);
- __brelse(bh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
mark_buffer_dirty(bh); /* Expose it to the VM */
}
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
void __journal_unfile_buffer(struct journal_head *jh)
{
__journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
+ journal_put_journal_head(jh);
}
void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
{
- jbd_lock_bh_state(jh2bh(jh));
+ struct buffer_head *bh = jh2bh(jh);
+
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
+ jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
__journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(jh2bh(jh));
+ jbd_unlock_bh_state(bh);
+ __brelse(bh);
}
/*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
/* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data");
__journal_unfile_buffer(jh);
- journal_remove_journal_head(bh);
- __brelse(bh);
}
} else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh);
- journal_remove_journal_head(bh);
- __brelse(bh);
}
}
spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
/*
* We take our own ref against the journal_head here to avoid
* having to add tons of locking around each instance of
- * journal_remove_journal_head() and journal_put_journal_head().
+ * journal_put_journal_head().
*/
jh = journal_grab_journal_head(bh);
if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
int may_free = 1;
struct buffer_head *bh = jh2bh(jh);
- __journal_unfile_buffer(jh);
-
if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction");
+ __journal_temp_unlink_buffer(jh);
/*
* We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
may_free = 0;
} else {
JBUFFER_TRACE(jh, "on running transaction");
- journal_remove_journal_head(bh);
- __brelse(bh);
+ __journal_unfile_buffer(jh);
}
return may_free;
}
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
if (jh->b_transaction)
__journal_temp_unlink_buffer(jh);
+ else
+ journal_grab_journal_head(bh);
jh->b_transaction = transaction;
switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
* already started to be used by a subsequent transaction, refile the
* buffer on that transaction's metadata list.
*
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
*/
void __journal_refile_buffer(struct journal_head *jh)
{
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__journal_temp_unlink_buffer(jh);
+ /*
+ * We set b_transaction here because b_next_transaction will inherit
+ * our jh reference and thus __journal_file_buffer() must not take a
+ * new one.
+ */
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
}
/*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
- *
- * __journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists. In that case it is up
- * to the caller to remove the journal_head if necessary. For the
- * unlocked journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * __journal_refile_buffer() with necessary locking added. We take our bh
+ * reference so that we can safely unlock bh.
+ *
+ * The jh and bh may be freed by this call.
*/
void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
-
__journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
-
spin_unlock(&journal->j_list_lock);
__brelse(bh);
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index eeead33d8ef0..b81b35ddf4e4 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -80,7 +80,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
if (ret) {
jffs2_free_raw_inode(ri);
- if (S_ISLNK(inode->i_mode & S_IFMT))
+ if (S_ISLNK(inode->i_mode))
kfree(mdata);
return ret;
}
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 76f856e284e4..7cf6cafcc007 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -6,7 +6,7 @@
#include <linux/completion.h>
#include <linux/sunrpc/cache.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
/*
* Deferred request handling
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b35d25b98da6..1940f1a56a5f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -53,7 +53,7 @@
#include <asm/system.h>
#include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "internal.h"
#include "iostat.h"
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 08579312c57b..00e37501fa3b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1566,8 +1566,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
int status;
bool sync = true;
- if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
- wbc->for_background)
+ if (wbc->sync_mode == WB_SYNC_NONE)
sync = false;
status = pnfs_layoutcommit_inode(inode, sync);
diff --git a/fs/notify/group.c b/fs/notify/group.c
index d309f38449cb..63fc294a4692 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -26,7 +26,7 @@
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
-#include <asm/atomic.h>
+#include <linux/atomic.h>
/*
* Final freeing of a group
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 07ea8d3e6ea2..b13c00ac48eb 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -23,7 +23,7 @@
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 252ab1f6452b..e14587d55689 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -92,7 +92,7 @@
#include <linux/spinlock.h>
#include <linux/srcu.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index f39260f8f865..ee188158a224 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -43,7 +43,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index e86577d6c5c3..778fe6cae3b0 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -24,7 +24,7 @@
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 2dabf813456c..fe8e7e928889 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -24,7 +24,7 @@
#ifndef _LINUX_NTFS_INODE_H
#define _LINUX_NTFS_INODE_H
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/list.h>
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3b8d3979e03b..98e544274390 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -93,7 +93,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
memset(bh->b_data, 0, sizeof(struct omfs_inode));
- if (inode->i_mode & S_IFDIR) {
+ if (S_ISDIR(inode->i_mode)) {
memset(&bh->b_data[OMFS_DIR_START], 0xff,
sbi->s_sys_blocksize - OMFS_DIR_START);
} else
diff --git a/fs/open.c b/fs/open.c
index 739b751aa73e..f71192109457 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -446,74 +446,52 @@ out:
return error;
}
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+static int chmod_common(struct path *path, umode_t mode)
{
- struct inode * inode;
- struct dentry * dentry;
- struct file * file;
- int err = -EBADF;
+ struct inode *inode = path->dentry->d_inode;
struct iattr newattrs;
+ int error;
- file = fget(fd);
- if (!file)
- goto out;
-
- dentry = file->f_path.dentry;
- inode = dentry->d_inode;
-
- audit_inode(NULL, dentry);
-
- err = mnt_want_write_file(file);
- if (err)
- goto out_putf;
+ error = mnt_want_write(path->mnt);
+ if (error)
+ return error;
mutex_lock(&inode->i_mutex);
- err = security_path_chmod(dentry, file->f_vfsmnt, mode);
- if (err)
+ error = security_path_chmod(path->dentry, path->mnt, mode);
+ if (error)
goto out_unlock;
- if (mode == (mode_t) -1)
- mode = inode->i_mode;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- err = notify_change(dentry, &newattrs);
+ error = notify_change(path->dentry, &newattrs);
out_unlock:
mutex_unlock(&inode->i_mutex);
- mnt_drop_write(file->f_path.mnt);
-out_putf:
- fput(file);
-out:
+ mnt_drop_write(path->mnt);
+ return error;
+}
+
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+{
+ struct file * file;
+ int err = -EBADF;
+
+ file = fget(fd);
+ if (file) {
+ audit_inode(NULL, file->f_path.dentry);
+ err = chmod_common(&file->f_path, mode);
+ fput(file);
+ }
return err;
}
SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
{
struct path path;
- struct inode *inode;
int error;
- struct iattr newattrs;
error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
- if (error)
- goto out;
- inode = path.dentry->d_inode;
-
- error = mnt_want_write(path.mnt);
- if (error)
- goto dput_and_out;
- mutex_lock(&inode->i_mutex);
- error = security_path_chmod(path.dentry, path.mnt, mode);
- if (error)
- goto out_unlock;
- if (mode == (mode_t) -1)
- mode = inode->i_mode;
- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(path.dentry, &newattrs);
-out_unlock:
- mutex_unlock(&inode->i_mutex);
- mnt_drop_write(path.mnt);
-dput_and_out:
- path_put(&path);
-out:
+ if (!error) {
+ error = chmod_common(&path, mode);
+ path_put(&path);
+ }
return error;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 1b7f9af67ccf..0e0be1dc0f8e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -948,7 +948,7 @@ static const struct dentry_operations pipefs_dentry_operations = {
static struct inode * get_pipe_inode(void)
{
- struct inode *inode = new_inode(pipe_mnt->mnt_sb);
+ struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
struct pipe_inode_info *pipe;
if (!inode)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index a6227d219e93..d43729a760e2 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -14,7 +14,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/posix_acl.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c9e3f650f23c..08e3eccf9a12 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2706,9 +2706,16 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
{
struct task_io_accounting acct = task->ioac;
unsigned long flags;
+ int result;
- if (!ptrace_may_access(task, PTRACE_MODE_READ))
- return -EACCES;
+ result = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ if (result)
+ return result;
+
+ if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
+ result = -EACCES;
+ goto out_unlock;
+ }
if (whole && lock_task_sighand(task, &flags)) {
struct task_struct *t = task;
@@ -2719,7 +2726,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
unlock_task_sighand(task, &flags);
}
- return sprintf(buffer,
+ result = sprintf(buffer,
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
@@ -2734,6 +2741,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
(unsigned long long)acct.read_bytes,
(unsigned long long)acct.write_bytes,
(unsigned long long)acct.cancelled_write_bytes);
+out_unlock:
+ mutex_unlock(&task->signal->cred_guard_mutex);
+ return result;
}
static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 74b48cfa1bb2..7ed72d6c1c6f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -319,7 +319,7 @@ static int proc_reg_open(struct inode *inode, struct file *file)
if (!pde->proc_fops) {
spin_unlock(&pde->pde_unload_lock);
kfree(pdeo);
- return -EINVAL;
+ return -ENOENT;
}
pde->pde_users++;
open = pde->proc_fops->open;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index ed257d141568..586174168e2a 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -10,7 +10,7 @@
#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/vmstat.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include "internal.h"
diff --git a/fs/read_write.c b/fs/read_write.c
index 5907b49e4d7e..179f1c33ea57 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -166,8 +166,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
* long as offset isn't at the end of the file then the
* offset is data.
*/
- if (offset >= inode->i_size)
- return -ENXIO;
+ if (offset >= inode->i_size) {
+ retval = -ENXIO;
+ goto out;
+ }
break;
case SEEK_HOLE:
/*
@@ -175,8 +177,10 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
* as long as offset isn't i_size or larger, return
* i_size.
*/
- if (offset >= inode->i_size)
- return -ENXIO;
+ if (offset >= inode->i_size) {
+ retval = -ENXIO;
+ goto out;
+ }
offset = inode->i_size;
break;
}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index acca2c5ca3fa..f7ce7debe14c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -265,7 +265,7 @@ xfs_open_by_handle(
return PTR_ERR(filp);
}
- if (inode->i_mode & S_IFREG) {
+ if (S_ISREG(inode->i_mode)) {
filp->f_flags |= O_NOATIME;
filp->f_mode |= FMODE_NOCMTIME;
}
@@ -850,14 +850,14 @@ xfs_set_diflags(
di_flags |= XFS_DIFLAG_NODEFRAG;
if (xflags & XFS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(ip->i_d.di_mode)) {
if (xflags & XFS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
if (xflags & XFS_XFLAG_NOSYMLINKS)
di_flags |= XFS_DIFLAG_NOSYMLINKS;
if (xflags & XFS_XFLAG_EXTSZINHERIT)
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ } else if (S_ISREG(ip->i_d.di_mode)) {
if (xflags & XFS_XFLAG_REALTIME)
di_flags |= XFS_DIFLAG_REALTIME;
if (xflags & XFS_XFLAG_EXTSIZE)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c51a3f903633..ab3e5c6c4642 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -414,7 +414,7 @@ xfs_bmap_add_attrfork_local(
if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
return 0;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(ip->i_d.di_mode)) {
mp = ip->i_mount;
memset(&dargs, 0, sizeof(dargs));
dargs.dp = ip;
@@ -3344,8 +3344,7 @@ xfs_bmap_local_to_extents(
* We don't want to deal with the case of keeping inode data inline yet.
* So sending the data fork of a regular inode is invalid.
*/
- ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG &&
- whichfork == XFS_DATA_FORK));
+ ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
flags = 0;
@@ -4052,7 +4051,7 @@ xfs_bmap_one_block(
#ifndef DEBUG
if (whichfork == XFS_DATA_FORK) {
- return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
+ return S_ISREG(ip->i_d.di_mode) ?
(ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 4580ce00aeb4..a2e27010c7fb 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -121,7 +121,7 @@ xfs_dir_isempty(
{
xfs_dir2_sf_hdr_t *sfp;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if (dp->i_d.di_size == 0) /* might happen during shutdown. */
return 1;
if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -179,7 +179,7 @@ xfs_dir_init(
memset((char *)&args, 0, sizeof(args));
args.dp = dp;
args.trans = tp;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
return error;
return xfs_dir2_sf_create(&args, pdp->i_ino);
@@ -202,7 +202,7 @@ xfs_dir_createname(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
XFS_STATS_INC(xs_dir_create);
@@ -278,7 +278,7 @@ xfs_dir_lookup(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
memset(&args, 0, sizeof(xfs_da_args_t));
@@ -333,7 +333,7 @@ xfs_dir_removename(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
memset(&args, 0, sizeof(xfs_da_args_t));
@@ -382,7 +382,7 @@ xfs_readdir(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return XFS_ERROR(EIO);
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_getdents);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -414,7 +414,7 @@ xfs_dir_replace(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
@@ -464,7 +464,7 @@ xfs_dir_canenter(
if (resblks)
return 0;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9124425b7f2f..3ff3d9e23ded 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -344,9 +344,9 @@ _xfs_filestream_update_ag(
* Either ip is a regular file and pip is a directory, or ip is a
* directory and pip is NULL.
*/
- ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
- (pip->i_d.di_mode & S_IFDIR)) ||
- ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+ ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
+ S_ISDIR(pip->i_d.di_mode)) ||
+ (S_ISDIR(ip->i_d.di_mode) && !pip)));
mp = ip->i_mount;
cache = mp->m_filestream;
@@ -537,7 +537,7 @@ xfs_filestream_lookup_ag(
xfs_agnumber_t ag;
int ref;
- if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+ if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
ASSERT(0);
return NULLAGNUMBER;
}
@@ -579,9 +579,9 @@ xfs_filestream_associate(
xfs_agnumber_t ag, rotorstep, startag;
int err = 0;
- ASSERT(pip->i_d.di_mode & S_IFDIR);
- ASSERT(ip->i_d.di_mode & S_IFREG);
- if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+ ASSERT(S_ISDIR(pip->i_d.di_mode));
+ ASSERT(S_ISREG(ip->i_d.di_mode));
+ if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
return -EINVAL;
mp = pip->i_mount;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3cc21ddf9f7e..2fcca4b03ed3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -368,7 +368,7 @@ xfs_iformat(
/*
* no local regular files yet
*/
- if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
+ if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
xfs_warn(ip->i_mount,
"corrupt inode %Lu (local format for regular file).",
(unsigned long long) ip->i_ino);
@@ -1040,7 +1040,7 @@ xfs_ialloc(
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
- if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
+ if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
ip->i_d.di_mode |= S_ISGID;
}
}
@@ -1097,14 +1097,14 @@ xfs_ialloc(
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
uint di_flags = 0;
- if ((mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
ip->i_d.di_extsize = pip->i_d.di_extsize;
}
- } else if ((mode & S_IFMT) == S_IFREG) {
+ } else if (S_ISREG(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_REALTIME;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
@@ -1188,7 +1188,7 @@ xfs_isize_check(
int nimaps;
xfs_bmbt_irec_t imaps[2];
- if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
+ if (!S_ISREG(ip->i_d.di_mode))
return;
if (XFS_IS_REALTIME_INODE(ip))
@@ -1828,7 +1828,7 @@ xfs_ifree(
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
- ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
+ (!S_ISREG(ip->i_d.di_mode)));
ASSERT(ip->i_d.di_nblocks == 0);
/*
@@ -2671,7 +2671,7 @@ xfs_iflush_int(
__func__, ip->i_ino, ip, ip->i_d.di_magic);
goto corrupt_out;
}
- if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ if (S_ISREG(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -2681,7 +2681,7 @@ xfs_iflush_int(
__func__, ip->i_ino, ip);
goto corrupt_out;
}
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ } else if (S_ISDIR(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index a97644ab945a..2380a4bcbece 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -263,7 +263,7 @@ typedef struct xfs_inode {
struct inode i_vnode; /* embedded VFS inode */
} xfs_inode_t;
-#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
+#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
(ip)->i_size : (ip)->i_d.di_size;
/* Convert from vfs inode to xfs inode */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8fe4206de057..052a2c0ec5fb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2283,7 +2283,7 @@ xlog_recover_inode_pass2(
/* Take the opportunity to reset the flush iteration count */
dicp->di_flushiter = 0;
- if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
+ if (unlikely(S_ISREG(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
@@ -2296,7 +2296,7 @@ xlog_recover_inode_pass2(
error = EFSCORRUPTED;
goto error;
}
- } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
+ } else if (unlikely(S_ISDIR(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE) &&
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7f25245da289..092e16ae4d9d 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1331,7 +1331,7 @@ xfs_mountfs(
ASSERT(rip != NULL);
- if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
+ if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
xfs_warn(mp, "corrupted root inode %llu: not a directory",
(unsigned long long)rip->i_ino);
xfs_iunlock(rip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 77a59891734e..df78c297d1a1 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -116,7 +116,7 @@ xfs_rename(
trace_xfs_rename(src_dp, target_dp, src_name, target_name);
new_parent = (src_dp != target_dp);
- src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
+ src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
if (src_is_directory) {
/*
@@ -226,7 +226,7 @@ xfs_rename(
* target and source are directories and that target can be
* destroyed, or that neither is a directory.
*/
- if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(target_ip->i_d.di_mode)) {
/*
* Make sure target dir is empty.
*/
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 88d121486c52..9322e13f0c63 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -121,7 +121,7 @@ xfs_readlink(
xfs_ilock(ip, XFS_ILOCK_SHARED);
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
+ ASSERT(S_ISLNK(ip->i_d.di_mode));
ASSERT(ip->i_d.di_size <= MAXPATHLEN);
pathlen = ip->i_d.di_size;
@@ -529,7 +529,7 @@ xfs_release(
if (ip->i_d.di_nlink == 0)
return 0;
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
@@ -610,7 +610,7 @@ xfs_inactive(
truncate = ((ip->i_d.di_nlink == 0) &&
((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
(ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
- ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
+ S_ISREG(ip->i_d.di_mode));
mp = ip->i_mount;
@@ -621,7 +621,7 @@ xfs_inactive(
goto out;
if (ip->i_d.di_nlink != 0) {
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS) &&
@@ -669,7 +669,7 @@ xfs_inactive(
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return VN_INACTIVE_CACHE;
}
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
+ } else if (S_ISLNK(ip->i_d.di_mode)) {
/*
* If we get an error while cleaning up a