summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2011-07-11 14:15:48 +0200
committerJiri Kosina <jkosina@suse.cz>2011-07-11 14:15:55 +0200
commitb7e9c223be8ce335e30f2cf6ba588e6a4092275c (patch)
tree2d1e3b75606abc18df7ad65e51ac3f90cd68b38d /fs
parentc172d82500a6cf3c32d1e650722a1055d72ce858 (diff)
parente3bbfa78bab125f58b831b5f7f45b5a305091d72 (diff)
downloadlinux-b7e9c223be8ce335e30f2cf6ba588e6a4092275c.tar.bz2
Merge branch 'master' into for-next
Sync with Linus' tree to be able to apply pending patches that are based on newer code already present upstream.
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c8
-rw-r--r--fs/afs/fsclient.c3
-rw-r--r--fs/afs/inode.c10
-rw-r--r--fs/afs/super.c74
-rw-r--r--fs/afs/write.c21
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/binfmt_elf_fdpic.c1
-rw-r--r--fs/block_dev.c14
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h21
-rw-r--r--fs/btrfs/delayed-inode.c136
-rw-r--r--fs/btrfs/delayed-inode.h6
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c63
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c172
-rw-r--r--fs/btrfs/inode.c30
-rw-r--r--fs/btrfs/ioctl.c25
-rw-r--r--fs/btrfs/relocation.c30
-rw-r--r--fs/btrfs/scrub.c69
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c121
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/caps.c10
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c47
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/ioctl.c6
-rw-r--r--fs/ceph/locks.c29
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/xattr.c6
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/cache.c6
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsfs.c192
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/connect.c127
-rw-r--r--fs/cifs/fscache.c52
-rw-r--r--fs/cifs/smbencrypt.c6
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/dcookies.c3
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext4/ext4_extents.h9
-rw-r--r--fs/ext4/extents.c42
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/mballoc.c8
-rw-r--r--fs/ext4/move_extent.c10
-rw-r--r--fs/ext4/super.c15
-rw-r--r--fs/fscache/page.c44
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hfsplus/wrapper.c7
-rw-r--r--fs/inode.c7
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd2/checkpoint.c28
-rw-r--r--fs/jbd2/commit.c33
-rw-r--r--fs/jbd2/journal.c91
-rw-r--r--fs/jbd2/transaction.c69
-rw-r--r--fs/jfs/file.c6
-rw-r--r--fs/jfs/jfs_imap.c12
-rw-r--r--fs/jfs/jfs_incore.h3
-rw-r--r--fs/jfs/resize.c2
-rw-r--r--fs/lockd/clntproc.c8
-rw-r--r--fs/locks.c30
-rw-r--r--fs/logfs/dir.c8
-rw-r--r--fs/namei.c34
-rw-r--r--fs/nfs/fscache.c8
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/internal.h11
-rw-r--r--fs/nfs/nfs4filelayout.c21
-rw-r--r--fs/nfs/nfs4proc.c45
-rw-r--r--fs/nfs/nfs4xdr.c26
-rw-r--r--fs/nfs/objlayout/objio_osd.c4
-rw-r--r--fs/nfs/objlayout/objlayout.c2
-rw-r--r--fs/nfs/pagelist.c3
-rw-r--r--fs/nfs/pnfs.c44
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/pnfs_dev.c17
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfsctl.c19
-rw-r--r--fs/nfsd/vfs.c19
-rw-r--r--fs/nilfs2/btree.c39
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/omfs/file.c1
-rw-r--r--fs/proc/base.c13
-rw-r--r--fs/proc/namespaces.c9
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/proc/root.c11
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/romfs/mmap-nommu.c8
-rw-r--r--fs/sysfs/mount.c37
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/timerfd.c5
-rw-r--r--fs/ubifs/super.c136
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c75
-rw-r--r--fs/xfs/xfs_attr.c7
-rw-r--r--fs/xfs/xfs_iget.c13
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_inode_item.c14
-rw-r--r--fs/xfs/xfs_log.c11
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_vnodeops.c7
110 files changed, 1517 insertions, 1194 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 20c106f24927..1b0b19550015 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -584,11 +584,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
success:
d_add(dentry, inode);
- _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
+ _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
fid.vnode,
fid.unique,
dentry->d_inode->i_ino,
- (unsigned long long)dentry->d_inode->i_version);
+ dentry->d_inode->i_generation);
return NULL;
}
@@ -671,10 +671,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
* been deleted and replaced, and the original vnode ID has
* been reused */
if (fid.unique != vnode->fid.unique) {
- _debug("%s: file deleted (uq %u -> %u I:%llu)",
+ _debug("%s: file deleted (uq %u -> %u I:%u)",
dentry->d_name.name, fid.unique,
vnode->fid.unique,
- (unsigned long long)dentry->d_inode->i_version);
+ dentry->d_inode->i_generation);
spin_lock(&vnode->lock);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
spin_unlock(&vnode->lock);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4bd0218473a9..346e3289abd7 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -89,7 +89,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
i_size_write(&vnode->vfs_inode, size);
vnode->vfs_inode.i_uid = status->owner;
vnode->vfs_inode.i_gid = status->group;
- vnode->vfs_inode.i_version = vnode->fid.unique;
+ vnode->vfs_inode.i_generation = vnode->fid.unique;
vnode->vfs_inode.i_nlink = status->nlink;
mode = vnode->vfs_inode.i_mode;
@@ -102,6 +102,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
+ vnode->vfs_inode.i_version = data_version;
}
expected_version = status->data_version;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index db66c5201474..0fdab6e03d87 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -75,7 +75,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_ctime.tv_nsec = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
inode->i_blocks = 0;
- inode->i_version = vnode->fid.unique;
+ inode->i_generation = vnode->fid.unique;
+ inode->i_version = vnode->status.data_version;
inode->i_mapping->a_ops = &afs_fs_aops;
/* check to see whether a symbolic link is really a mountpoint */
@@ -100,7 +101,7 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
struct afs_iget_data *data = opaque;
return inode->i_ino == data->fid.vnode &&
- inode->i_version == data->fid.unique;
+ inode->i_generation == data->fid.unique;
}
/*
@@ -122,7 +123,7 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
struct afs_vnode *vnode = AFS_FS_I(inode);
inode->i_ino = data->fid.vnode;
- inode->i_version = data->fid.unique;
+ inode->i_generation = data->fid.unique;
vnode->fid = data->fid;
vnode->volume = data->volume;
@@ -380,8 +381,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
inode = dentry->d_inode;
- _enter("{ ino=%lu v=%llu }", inode->i_ino,
- (unsigned long long)inode->i_version);
+ _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
generic_fillattr(inode, stat);
return 0;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index fb240e8766d6..356dcf0929e8 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -31,8 +31,8 @@
static void afs_i_init_once(void *foo);
static struct dentry *afs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data);
+static void afs_kill_super(struct super_block *sb);
static struct inode *afs_alloc_inode(struct super_block *sb);
-static void afs_put_super(struct super_block *sb);
static void afs_destroy_inode(struct inode *inode);
static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -40,7 +40,7 @@ struct file_system_type afs_fs_type = {
.owner = THIS_MODULE,
.name = "afs",
.mount = afs_mount,
- .kill_sb = kill_anon_super,
+ .kill_sb = afs_kill_super,
.fs_flags = 0,
};
@@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = {
.drop_inode = afs_drop_inode,
.destroy_inode = afs_destroy_inode,
.evict_inode = afs_evict_inode,
- .put_super = afs_put_super,
.show_options = generic_show_options,
};
@@ -282,19 +281,25 @@ static int afs_parse_device_name(struct afs_mount_params *params,
*/
static int afs_test_super(struct super_block *sb, void *data)
{
- struct afs_mount_params *params = data;
+ struct afs_super_info *as1 = data;
struct afs_super_info *as = sb->s_fs_info;
- return as->volume == params->volume;
+ return as->volume == as1->volume;
+}
+
+static int afs_set_super(struct super_block *sb, void *data)
+{
+ sb->s_fs_info = data;
+ return set_anon_super(sb, NULL);
}
/*
* fill in the superblock
*/
-static int afs_fill_super(struct super_block *sb, void *data)
+static int afs_fill_super(struct super_block *sb,
+ struct afs_mount_params *params)
{
- struct afs_mount_params *params = data;
- struct afs_super_info *as = NULL;
+ struct afs_super_info *as = sb->s_fs_info;
struct afs_fid fid;
struct dentry *root = NULL;
struct inode *inode = NULL;
@@ -302,23 +307,13 @@ static int afs_fill_super(struct super_block *sb, void *data)
_enter("");
- /* allocate a superblock info record */
- as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
- if (!as) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- afs_get_volume(params->volume);
- as->volume = params->volume;
-
/* fill in the superblock */
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = AFS_FS_MAGIC;
sb->s_op = &afs_super_ops;
- sb->s_fs_info = as;
sb->s_bdi = &as->volume->bdi;
+ strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
/* allocate the root inode and dentry */
fid.vid = as->volume->vid;
@@ -326,7 +321,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
fid.unique = 1;
inode = afs_iget(sb, params->key, &fid, NULL, NULL);
if (IS_ERR(inode))
- goto error_inode;
+ return PTR_ERR(inode);
if (params->autocell)
set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
@@ -342,16 +337,8 @@ static int afs_fill_super(struct super_block *sb, void *data)
_leave(" = 0");
return 0;
-error_inode:
- ret = PTR_ERR(inode);
- inode = NULL;
error:
iput(inode);
- afs_put_volume(as->volume);
- kfree(as);
-
- sb->s_fs_info = NULL;
-
_leave(" = %d", ret);
return ret;
}
@@ -367,6 +354,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
struct afs_volume *vol;
struct key *key;
char *new_opts = kstrdup(options, GFP_KERNEL);
+ struct afs_super_info *as;
int ret;
_enter(",,%s,%p", dev_name, options);
@@ -399,12 +387,22 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
ret = PTR_ERR(vol);
goto error;
}
- params.volume = vol;
+
+ /* allocate a superblock info record */
+ as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+ if (!as) {
+ ret = -ENOMEM;
+ afs_put_volume(vol);
+ goto error;
+ }
+ as->volume = vol;
/* allocate a deviceless superblock */
- sb = sget(fs_type, afs_test_super, set_anon_super, &params);
+ sb = sget(fs_type, afs_test_super, afs_set_super, as);
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
+ afs_put_volume(vol);
+ kfree(as);
goto error;
}
@@ -422,16 +420,16 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
} else {
_debug("reuse");
ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
+ afs_put_volume(vol);
+ kfree(as);
}
- afs_put_volume(params.volume);
afs_put_cell(params.cell);
kfree(new_opts);
_leave(" = 0 [%p]", sb);
return dget(sb->s_root);
error:
- afs_put_volume(params.volume);
afs_put_cell(params.cell);
key_put(params.key);
kfree(new_opts);
@@ -439,18 +437,12 @@ error:
return ERR_PTR(ret);
}
-/*
- * finish the unmounting process on the superblock
- */
-static void afs_put_super(struct super_block *sb)
+static void afs_kill_super(struct super_block *sb)
{
struct afs_super_info *as = sb->s_fs_info;
-
- _enter("");
-
+ kill_anon_super(sb);
afs_put_volume(as->volume);
-
- _leave("");
+ kfree(as);
}
/*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 789b3afb3423..b806285ff853 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,23 +84,21 @@ void afs_put_writeback(struct afs_writeback *wb)
* partly or wholly fill a page that's under preparation for writing
*/
static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
- loff_t pos, unsigned len, struct page *page)
+ loff_t pos, struct page *page)
{
loff_t i_size;
- unsigned eof;
int ret;
+ int len;
- _enter(",,%llu,%u", (unsigned long long)pos, len);
-
- ASSERTCMP(len, <=, PAGE_CACHE_SIZE);
+ _enter(",,%llu", (unsigned long long)pos);
i_size = i_size_read(&vnode->vfs_inode);
- if (pos + len > i_size)
- eof = i_size;
+ if (pos + PAGE_CACHE_SIZE > i_size)
+ len = i_size - pos;
else
- eof = PAGE_CACHE_SIZE;
+ len = PAGE_CACHE_SIZE;
- ret = afs_vnode_fetch_data(vnode, key, 0, eof, page);
+ ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
if (ret < 0) {
if (ret == -ENOENT) {
_debug("got NOENT from server"
@@ -153,9 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
*pagep = page;
/* page won't leak in error case: it eventually gets cleaned off LRU */
- if (!PageUptodate(page)) {
- _debug("not up to date");
- ret = afs_fill_page(vnode, key, pos, len, page);
+ if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
+ ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
if (ret < 0) {
kfree(candidate);
_leave(" = %d [prep]", ret);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9ad2369d9e35..bfcb18feb1df 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -231,9 +231,6 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
{
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
-
return -EIO;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 63039ed9576f..2bc5dc644b4c 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1864,6 +1864,7 @@ cleanup:
kfree(psinfo);
kfree(notes);
kfree(fpu);
+ kfree(shdr4extnum);
#ifdef ELF_CORE_COPY_XFPREGS
kfree(xfpu);
#endif
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1a2421f908f0..610e8e0b04b8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -762,7 +762,19 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
if (!disk)
return ERR_PTR(-ENXIO);
- whole = bdget_disk(disk, 0);
+ /*
+ * Normally, @bdev should equal what's returned from bdget_disk()
+ * if partno is 0; however, some drivers (floppy) use multiple
+ * bdev's for the same physical device and @bdev may be one of the
+ * aliases. Keep @bdev if partno is 0. This means claimer
+ * tracking is broken for those devices but it has always been that
+ * way.
+ */
+ if (partno)
+ whole = bdget_disk(disk, 0);
+ else
+ whole = bdgrab(bdev);
+
module_put(disk->fops->owner);
put_disk(disk);
if (!whole)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d84089349c82..2e667868e0d2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1228,6 +1228,7 @@ static void reada_for_search(struct btrfs_root *root,
u32 nr;
u32 blocksize;
u32 nscan = 0;
+ bool map = true;
if (level != 1)
return;
@@ -1249,8 +1250,11 @@ static void reada_for_search(struct btrfs_root *root,
nritems = btrfs_header_nritems(node);
nr = slot;
+ if (node->map_token || path->skip_locking)
+ map = false;
+
while (1) {
- if (!node->map_token) {
+ if (map && !node->map_token) {
unsigned long offset = btrfs_node_key_ptr_offset(nr);
map_private_extent_buffer(node, offset,
sizeof(struct btrfs_key_ptr),
@@ -1277,7 +1281,7 @@ static void reada_for_search(struct btrfs_root *root,
if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) {
gen = btrfs_node_ptr_generation(node, nr);
- if (node->map_token) {
+ if (map && node->map_token) {
unmap_extent_buffer(node, node->map_token,
KM_USER1);
node->map_token = NULL;
@@ -1289,7 +1293,7 @@ static void reada_for_search(struct btrfs_root *root,
if ((nread > 65536 || nscan > 32))
break;
}
- if (node->map_token) {
+ if (map && node->map_token) {
unmap_extent_buffer(node, node->map_token, KM_USER1);
node->map_token = NULL;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..3b859a3e6a0e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,7 +19,6 @@
#ifndef __BTRFS_CTREE__
#define __BTRFS_CTREE__
-#include <linux/version.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/fs.h>
@@ -967,6 +966,12 @@ struct btrfs_fs_info {
struct srcu_struct subvol_srcu;
spinlock_t trans_lock;
+ /*
+ * the reloc mutex goes with the trans lock, it is taken
+ * during commit to protect us from the relocation code
+ */
+ struct mutex reloc_mutex;
+
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
@@ -1172,6 +1177,14 @@ struct btrfs_root {
u32 type;
u64 highest_objectid;
+
+ /* btrfs_record_root_in_trans is a multi-step process,
+ * and it can race with the balancing code. But the
+ * race is very small, and only the first time the root
+ * is added to each transaction. So in_trans_setup
+ * is used to tell us when more checks are required
+ */
+ unsigned long in_trans_setup;
int ref_cows;
int track_dirty;
int in_radix;
@@ -1181,7 +1194,6 @@ struct btrfs_root {
struct btrfs_key defrag_max;
int defrag_running;
char *name;
- int in_sysfs;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
@@ -1323,6 +1335,11 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_STRING_ITEM_KEY 253
+/*
+ * Flags for mount options.
+ *
+ * Note: don't forget to add new options to btrfs_show_options()
+ */
#define BTRFS_MOUNT_NODATASUM (1 << 0)
#define BTRFS_MOUNT_NODATACOW (1 << 1)
#define BTRFS_MOUNT_NOBARRIER (1 << 2)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..98c68e658a9b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
return root->fs_info->delayed_root;
}
-static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
- struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
{
- struct btrfs_delayed_node *node;
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
u64 ino = btrfs_ino(inode);
- int ret;
+ struct btrfs_delayed_node *node;
-again:
node = ACCESS_ONCE(btrfs_inode->delayed_node);
if (node) {
- atomic_inc(&node->refs); /* can be accessed */
+ atomic_inc(&node->refs);
return node;
}
@@ -102,8 +99,10 @@ again:
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
if (node) {
if (btrfs_inode->delayed_node) {
+ atomic_inc(&node->refs); /* can be accessed */
+ BUG_ON(btrfs_inode->delayed_node != node);
spin_unlock(&root->inode_lock);
- goto again;
+ return node;
}
btrfs_inode->delayed_node = node;
atomic_inc(&node->refs); /* can be accessed */
@@ -113,6 +112,23 @@ again:
}
spin_unlock(&root->inode_lock);
+ return NULL;
+}
+
+static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
+ struct inode *inode)
+{
+ struct btrfs_delayed_node *node;
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ struct btrfs_root *root = btrfs_inode->root;
+ u64 ino = btrfs_ino(inode);
+ int ret;
+
+again:
+ node = btrfs_get_delayed_node(inode);
+ if (node)
+ return node;
+
node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
if (!node)
return ERR_PTR(-ENOMEM);
@@ -297,7 +313,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
item->data_len = data_len;
item->ins_or_del = 0;
item->bytes_reserved = 0;
- item->block_rsv = NULL;
item->delayed_node = NULL;
atomic_set(&item->refs, 1);
}
@@ -549,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item(
return next;
}
-static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
- struct inode *inode)
-{
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
- struct btrfs_delayed_node *delayed_node;
-
- delayed_node = btrfs_inode->delayed_node;
- if (delayed_node)
- atomic_inc(&delayed_node->refs);
-
- return delayed_node;
-}
-
static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
u64 root_id)
{
@@ -593,10 +595,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret) {
+ if (!ret)
item->bytes_reserved = num_bytes;
- item->block_rsv = dst_rsv;
- }
return ret;
}
@@ -604,10 +604,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
+ struct btrfs_block_rsv *rsv;
+
if (!item->bytes_reserved)
return;
- btrfs_block_rsv_release(root, item->block_rsv,
+ rsv = &root->fs_info->global_block_rsv;
+ btrfs_block_rsv_release(root, rsv,
item->bytes_reserved);
}
@@ -1014,6 +1017,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret = 0;
path = btrfs_alloc_path();
@@ -1021,6 +1025,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1052,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
}
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1052,6 +1060,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret;
path = btrfs_alloc_path();
@@ -1059,6 +1068,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &node->root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1078,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1116,6 +1129,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
+ struct btrfs_block_rsv *block_rsv;
unsigned long nr = 0;
int need_requeue = 0;
int ret;
@@ -1134,6 +1148,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
if (IS_ERR(trans))
goto free_path;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1193,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
nr = trans->blocks_used;
+ trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
__btrfs_btree_balance_dirty(root, nr);
free_path:
@@ -1222,6 +1240,13 @@ again:
return 0;
}
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+ struct btrfs_delayed_root *delayed_root;
+ delayed_root = btrfs_get_delayed_root(root);
+ WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
@@ -1382,8 +1407,7 @@ end:
int btrfs_inode_delayed_dir_index_count(struct inode *inode)
{
- struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
- int ret = 0;
+ struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
return -ENOENT;
@@ -1393,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
* a new directory index is added into the delayed node and index_cnt
* is updated now. So we needn't lock the delayed node.
*/
- if (!delayed_node->index_cnt)
+ if (!delayed_node->index_cnt) {
+ btrfs_release_delayed_node(delayed_node);
return -EINVAL;
+ }
BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
- return ret;
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
}
void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
@@ -1591,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode->i_ctime.tv_nsec);
}
+int btrfs_fill_inode(struct inode *inode, u32 *rdev)
+{
+ struct btrfs_delayed_node *delayed_node;
+ struct btrfs_inode_item *inode_item;
+ struct btrfs_timespec *tspec;
+
+ delayed_node = btrfs_get_delayed_node(inode);
+ if (!delayed_node)
+ return -ENOENT;
+
+ mutex_lock(&delayed_node->mutex);
+ if (!delayed_node->inode_dirty) {
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return -ENOENT;
+ }
+
+ inode_item = &delayed_node->inode_item;
+
+ inode->i_uid = btrfs_stack_inode_uid(inode_item);
+ inode->i_gid = btrfs_stack_inode_gid(inode_item);
+ btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+ inode->i_mode = btrfs_stack_inode_mode(inode_item);
+ inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
+ inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
+ BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
+ BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
+ inode->i_rdev = 0;
+ *rdev = btrfs_stack_inode_rdev(inode_item);
+ BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
+
+ tspec = btrfs_inode_atime(inode_item);
+ inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ tspec = btrfs_inode_mtime(inode_item);
+ inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ tspec = btrfs_inode_ctime(inode_item);
+ inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ inode->i_generation = BTRFS_I(inode)->generation;
+ BTRFS_I(inode)->index_cnt = (u64)-1;
+
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
+}
+
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode)
{
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..8d27af4bd8b9 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
struct list_head tree_list; /* used for batch insert/delete items */
struct list_head readdir_list; /* used for readdir items */
u64 bytes_reserved;
- struct btrfs_block_rsv *block_rsv;
struct btrfs_delayed_node *delayed_node;
atomic_t refs;
int ins_or_del;
@@ -120,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
+int btrfs_fill_inode(struct inode *inode, u32 *rdev);
/* Used for drop dead root */
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
@@ -138,4 +138,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
/* for init */
int __init btrfs_delayed_inode_init(void);
void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a203d363184d..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->last_trans = 0;
root->highest_objectid = 0;
root->name = NULL;
- root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
return root;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
- if (!root->free_ino_ctl)
- goto fail;
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
GFP_NOFS);
- if (!root->free_ino_pinned)
+ if (!root->free_ino_pinned || !root->free_ino_ctl) {
+ ret = -ENOMEM;
goto fail;
+ }
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);
- set_anon_super(&root->anon_super, NULL);
+ ret = set_anon_super(&root->anon_super, NULL);
+ if (ret)
+ goto fail;
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
+ mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
@@ -1668,8 +1670,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->scrub_pause_wait);
init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
- btrfs_init_workers(&fs_info->scrub_workers, "scrub",
- fs_info->thread_pool_size, &fs_info->generic_worker);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
@@ -2911,9 +2911,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
INIT_LIST_HEAD(&splice);
- list_splice_init(&root->fs_info->delalloc_inodes, &splice);
-
spin_lock(&root->fs_info->delalloc_lock);
+ list_splice_init(&root->fs_info->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5b9b6b6df242..71cd456fdb60 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3089,6 +3089,13 @@ alloc:
}
goto again;
}
+
+ /*
+ * If we have less pinned bytes than we want to allocate then
+ * don't bother committing the transaction, it won't help us.
+ */
+ if (data_sinfo->bytes_pinned < bytes)
+ committed = 1;
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
@@ -3307,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0)
return 0;
- /* nothing to shrink - nothing to reclaim */
- if (root->fs_info->delalloc_bytes == 0)
- return 0;
-
max_reclaim = min(reserved, to_reclaim);
while (loops < 1024) {
@@ -4839,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
- int data)
+ u64 data)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4866,7 +4869,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
space_info = __find_space_info(root->fs_info, data);
if (!space_info) {
- printk(KERN_ERR "No space info for %d\n", data);
+ printk(KERN_ERR "No space info for %llu\n", data);
return -ENOSPC;
}
@@ -5211,9 +5214,7 @@ loop:
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
* again
*/
- if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
- (found_uncached_bg || empty_size || empty_cluster ||
- allowed_chunk_alloc)) {
+ if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
index = 0;
if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
found_uncached_bg = false;
@@ -5253,32 +5254,36 @@ loop:
goto search;
}
- if (loop < LOOP_CACHING_WAIT) {
- loop++;
- goto search;
- }
+ loop++;
if (loop == LOOP_ALLOC_CHUNK) {
- empty_size = 0;
- empty_cluster = 0;
- }
+ if (allowed_chunk_alloc) {
+ ret = do_chunk_alloc(trans, root, num_bytes +
+ 2 * 1024 * 1024, data,
+ CHUNK_ALLOC_LIMITED);
+ allowed_chunk_alloc = 0;
+ if (ret == 1)
+ done_chunk_alloc = 1;
+ } else if (!done_chunk_alloc &&
+ space_info->force_alloc ==
+ CHUNK_ALLOC_NO_FORCE) {
+ space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+ }
- if (allowed_chunk_alloc) {
- ret = do_chunk_alloc(trans, root, num_bytes +
- 2 * 1024 * 1024, data,
- CHUNK_ALLOC_LIMITED);
- allowed_chunk_alloc = 0;
- done_chunk_alloc = 1;
- } else if (!done_chunk_alloc &&
- space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
- space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+ /*
+ * We didn't allocate a chunk, go ahead and drop the
+ * empty size and loop again.
+ */
+ if (!done_chunk_alloc)
+ loop = LOOP_NO_EMPTY_SIZE;
}
- if (loop < LOOP_NO_EMPTY_SIZE) {
- loop++;
- goto search;
+ if (loop == LOOP_NO_EMPTY_SIZE) {
+ empty_size = 0;
+ empty_cluster = 0;
}
- ret = -ENOSPC;
+
+ goto search;
} else if (!ins->objectid) {
ret = -ENOSPC;
} else if (ins->objectid) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a4757c..a11a92ee2d30 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -126,9 +126,9 @@ struct extent_buffer {
unsigned long map_len;
struct page *first_page;
unsigned long bflags;
- atomic_t refs;
struct list_head leak_list;
struct rcu_head rcu_head;
+ atomic_t refs;
/* the spinlock is used to protect most operations */
spinlock_t lock;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ad144736a5fd..bf0d61567f3d 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
pgoff_t index = 0;
unsigned long first_page_offset;
int num_checksums;
- int ret = 0, ret2;
+ int ret = 0;
INIT_LIST_HEAD(&bitmaps);
@@ -421,11 +421,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto free_cache;
}
spin_lock(&ctl->tree_lock);
- ret2 = link_free_space(ctl, e);
+ ret = link_free_space(ctl, e);
ctl->total_bitmaps++;
ctl->op->recalc_thresholds(ctl);
spin_unlock(&ctl->tree_lock);
- list_add_tail(&e->list, &bitmaps);
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
@@ -434,6 +433,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
page_cache_release(page);
goto free_cache;
}
+ list_add_tail(&e->list, &bitmaps);
}
num_entries--;
@@ -1417,6 +1417,23 @@ again:
return 0;
}
+static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info, u64 offset,
+ u64 bytes)
+{
+ u64 bytes_to_set = 0;
+ u64 end;
+
+ end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
+
+ bytes_to_set = min(end - offset, bytes);
+
+ bitmap_set_bits(ctl, info, offset, bytes_to_set);
+
+ return bytes_to_set;
+
+}
+
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
@@ -1453,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
return true;
}
+static struct btrfs_free_space_op free_space_op = {
+ .recalc_thresholds = recalculate_thresholds,
+ .use_bitmap = use_bitmap,
+};
+
static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
struct btrfs_free_space *bitmap_info;
+ struct btrfs_block_group_cache *block_group = NULL;
int added = 0;
- u64 bytes, offset, end;
+ u64 bytes, offset, bytes_added;
int ret;
bytes = info->bytes;
@@ -1467,7 +1490,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
if (!ctl->op->use_bitmap(ctl, info))
return 0;
+ if (ctl->op == &free_space_op)
+ block_group = ctl->private;
again:
+ /*
+ * Since we link bitmaps right into the cluster we need to see if we
+ * have a cluster here, and if so and it has our bitmap we need to add
+ * the free space to that bitmap.
+ */
+ if (block_group && !list_empty(&block_group->cluster_list)) {
+ struct btrfs_free_cluster *cluster;
+ struct rb_node *node;
+ struct btrfs_free_space *entry;
+
+ cluster = list_entry(block_group->cluster_list.next,
+ struct btrfs_free_cluster,
+ block_group_list);
+ spin_lock(&cluster->lock);
+ node = rb_first(&cluster->root);
+ if (!node) {
+ spin_unlock(&cluster->lock);
+ goto no_cluster_bitmap;
+ }
+
+ entry = rb_entry(node, struct btrfs_free_space, offset_index);
+ if (!entry->bitmap) {
+ spin_unlock(&cluster->lock);
+ goto no_cluster_bitmap;
+ }
+
+ if (entry->offset == offset_to_bitmap(ctl, offset)) {
+ bytes_added = add_bytes_to_bitmap(ctl, entry,
+ offset, bytes);
+ bytes -= bytes_added;
+ offset += bytes_added;
+ }
+ spin_unlock(&cluster->lock);
+ if (!bytes) {
+ ret = 1;
+ goto out;
+ }
+ }
+
+no_cluster_bitmap:
bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1, 0);
if (!bitmap_info) {
@@ -1475,19 +1540,10 @@ again:
goto new_bitmap;
}
- end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
-
- if (offset >= bitmap_info->offset && offset + bytes > end) {
- bitmap_set_bits(ctl, bitmap_info, offset, end - offset);
- bytes -= end - offset;
- offset = end;
- added = 0;
- } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
- bitmap_set_bits(ctl, bitmap_info, offset, bytes);
- bytes = 0;
- } else {
- BUG();
- }
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+ bytes -= bytes_added;
+ offset += bytes_added;
+ added = 0;
if (!bytes) {
ret = 1;
@@ -1766,11 +1822,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
"\n", count);
}
-static struct btrfs_free_space_op free_space_op = {
- .recalc_thresholds = recalculate_thresholds,
- .use_bitmap = use_bitmap,
-};
-
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -1842,9 +1893,12 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
- unlink_free_space(ctl, info);
- kfree(info->bitmap);
- kmem_cache_free(btrfs_free_space_cachep, info);
+ if (!info->bitmap) {
+ unlink_free_space(ctl, info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ } else {
+ free_bitmap(ctl, info);
+ }
if (need_resched()) {
spin_unlock(&ctl->tree_lock);
cond_resched();
@@ -2142,9 +2196,11 @@ again:
/*
* This searches the block group for just extents to fill the cluster with.
*/
-static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 min_bytes)
+static noinline int
+setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ struct list_head *bitmaps, u64 offset, u64 bytes,
+ u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *first = NULL;
@@ -2166,6 +2222,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* extent entry.
*/
while (entry->bitmap) {
+ if (list_empty(&entry->list))
+ list_add_tail(&entry->list, bitmaps);
node = rb_next(&entry->offset_index);
if (!node)
return -ENOSPC;
@@ -2185,8 +2243,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
return -ENOSPC;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
- if (entry->bitmap)
+ if (entry->bitmap) {
+ if (list_empty(&entry->list))
+ list_add_tail(&entry->list, bitmaps);
continue;
+ }
+
/*
* we haven't filled the empty size and the window is
* very large. reset and try again
@@ -2238,9 +2300,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* This specifically looks for bitmaps that may work in the cluster, we assume
* that we have already failed to find extents that will work.
*/
-static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 min_bytes)
+static noinline int
+setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ struct list_head *bitmaps, u64 offset, u64 bytes,
+ u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
@@ -2250,10 +2314,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
if (ctl->total_bitmaps == 0)
return -ENOSPC;
+ /*
+ * First check our cached list of bitmaps and see if there is an entry
+ * here that will work.
+ */
+ list_for_each_entry(entry, bitmaps, list) {
+ if (entry->bytes < min_bytes)
+ continue;
+ ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
+ bytes, min_bytes);
+ if (!ret)
+ return 0;
+ }
+
+ /*
+ * If we do have entries on our list and we are here then we didn't find
+ * anything, so go ahead and get the next entry after the last entry in
+ * this list and start the search from there.
+ */
+ if (!list_empty(bitmaps)) {
+ entry = list_entry(bitmaps->prev, struct btrfs_free_space,
+ list);
+ node = rb_next(&entry->offset_index);
+ if (!node)
+ return -ENOSPC;
+ entry = rb_entry(node, struct btrfs_free_space, offset_index);
+ goto search;
+ }
+
entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
if (!entry)
return -ENOSPC;
+search:
node = &entry->offset_index;
do {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2284,6 +2377,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
u64 offset, u64 bytes, u64 empty_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct list_head bitmaps;
+ struct btrfs_free_space *entry, *tmp;
u64 min_bytes;
int ret;
@@ -2322,11 +2417,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
goto out;
}
- ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
- min_bytes);
+ INIT_LIST_HEAD(&bitmaps);
+ ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
+ bytes, min_bytes);
if (ret)
- ret = setup_cluster_bitmap(block_group, cluster, offset,
- bytes, min_bytes);
+ ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
+ offset, bytes, min_bytes);
+
+ /* Clear our temporary list */
+ list_for_each_entry_safe(entry, tmp, &bitmaps, list)
+ list_del_init(&entry->list);
if (!ret) {
atomic_inc(&block_group->count);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ebf95f7a44d6..3601f0aebddf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1986,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
- return 0;
+ goto good;
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
int maybe_acls;
u32 rdev;
int ret;
+ bool filled = false;
+
+ ret = btrfs_fill_inode(inode, &rdev);
+ if (!ret)
+ filled = true;
path = btrfs_alloc_path();
BUG_ON(!path);
@@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode *inode)
goto make_bad;
leaf = path->nodes[0];
+
+ if (filled)
+ goto cache_acl;
+
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
if (!leaf->map_token)
@@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
+cache_acl:
/*
* try to precache a NULL acl entry for files that don't have
* any xattrs or acls
@@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
}
btrfs_free_path(path);
- inode_item = NULL;
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
@@ -2670,12 +2678,14 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
int ret;
/*
- * If root is tree root, it means this inode is used to
- * store free space information. And these inodes are updated
- * when committing the transaction, so they needn't delaye to
- * be updated, or deadlock will occured.
+ * If the inode is a free space inode, we can deadlock during commit
+ * if we put it into the delayed code.
+ *
+ * The data relocation inode should also be directly updated
+ * without delay
*/
- if (!is_free_space_inode(root, inode)) {
+ if (!is_free_space_inode(root, inode)
+ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
btrfs_set_inode_last_trans(trans, inode);
@@ -3076,6 +3086,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
+ btrfs_free_path(path);
return 0;
}
@@ -3646,7 +3657,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_i_size_write(inode, 0);
while (1) {
- trans = btrfs_start_transaction(root, 0);
+ trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = root->orphan_block_rsv;
@@ -4519,6 +4530,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
+ btrfs_set_inode_last_trans(trans, inode);
return inode;
fail:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ac37040e426a..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
BUG_ON(ret);
+ spin_lock(&root->fs_info->trans_lock);
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
+ spin_unlock(&root->fs_info->trans_lock);
if (async_transid) {
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,
@@ -2054,29 +2056,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
{
- struct btrfs_ioctl_fs_info_args fi_args;
+ struct btrfs_ioctl_fs_info_args *fi_args;
struct btrfs_device *device;
struct btrfs_device *next;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- fi_args.num_devices = fs_devices->num_devices;
- fi_args.max_id = 0;
- memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid));
+ fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
+ if (!fi_args)
+ return -ENOMEM;
+
+ fi_args->num_devices = fs_devices->num_devices;
+ memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
- if (device->devid > fi_args.max_id)
- fi_args.max_id = device->devid;
+ if (device->devid > fi_args->max_id)
+ fi_args->max_id = device->devid;
}
mutex_unlock(&fs_devices->device_list_mutex);
- if (copy_to_user(arg, &fi_args, sizeof(fi_args)))
- return -EFAULT;
+ if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
+ ret = -EFAULT;
- return 0;
+ kfree(fi_args);
+ return ret;
}
static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
int ret;
if (!root->reloc_root)
- return 0;
+ goto out;
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&reloc_root->root_key, root_item);
BUG_ON(ret);
+
+out:
return 0;
}
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
u64 num_bytes = 0;
int ret;
- spin_lock(&root->fs_info->trans_lock);
+ mutex_lock(&root->fs_info->reloc_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
rc->merging_rsv_size += rc->nodes_relocated * 2;
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
int ret;
again:
root = rc->extent_root;
- spin_lock(&root->fs_info->trans_lock);
+
+ /*
+ * this serializes us with btrfs_record_root_in_transaction,
+ * we have to make sure nobody is in the middle of
+ * adding their roots to the list while we are
+ * doing this splice
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
list_splice_init(&rc->reloc_roots, &reloc_roots);
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
while (!list_empty(&reloc_roots)) {
found = 1;
@@ -3590,17 +3600,19 @@ next:
static void set_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = rc;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static void unset_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = NULL;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index df50fd1eca8f..a8d03d5efb5d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>
#include <linux/blkdev.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
#include "ctree.h"
#include "volumes.h"
#include "disk-io.h"
@@ -804,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
-
- l = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid != logical) {
- ret = btrfs_previous_item(root, path, 0,
- BTRFS_EXTENT_ITEM_KEY);
- if (ret < 0)
- goto out;
- }
+ goto out_noplug;
+ /*
+ * we might miss half an extent here, but that doesn't matter,
+ * as it's only the prefetch
+ */
while (1) {
l = path->nodes[0];
slot = path->slots[0];
@@ -824,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
if (ret == 0)
continue;
if (ret < 0)
- goto out;
+ goto out_noplug;
break;
}
@@ -906,15 +894,20 @@ again:
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
-
- l = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid != logical) {
+ if (ret > 0) {
ret = btrfs_previous_item(root, path, 0,
BTRFS_EXTENT_ITEM_KEY);
if (ret < 0)
goto out;
+ if (ret > 0) {
+ /* there's no smaller item, so stick with the
+ * larger one */
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(NULL, root, &key,
+ path, 0, 0);
+ if (ret < 0)
+ goto out;
+ }
}
while (1) {
@@ -989,6 +982,7 @@ next:
out:
blk_finish_plug(&plug);
+out_noplug:
btrfs_free_path(path);
return ret < 0 ? ret : 0;
}
@@ -1064,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
- ret = 0;
+ break;
+ if (ret > 0) {
+ if (path->slots[0] >=
+ btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret)
+ break;
+ }
+ }
l = path->nodes[0];
slot = path->slots[0];
@@ -1075,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
if (found_key.objectid != sdev->dev->devid)
break;
- if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
break;
if (found_key.offset >= end)
@@ -1104,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
if (!cache) {
ret = -ENOENT;
- goto out;
+ break;
}
ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
chunk_offset, length);
@@ -1116,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
btrfs_release_path(path);
}
-out:
btrfs_free_path(path);
- return ret;
+
+ /*
+ * ret can still be 1 from search_slot or next_leaf,
+ * that's not an error
+ */
+ return ret < 0 ? ret : 0;
}
static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
@@ -1155,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
struct btrfs_fs_info *fs_info = root->fs_info;
mutex_lock(&fs_info->scrub_lock);
- if (fs_info->scrub_workers_refcnt == 0)
+ if (fs_info->scrub_workers_refcnt == 0) {
+ btrfs_init_workers(&fs_info->scrub_workers, "scrub",
+ fs_info->thread_pool_size, &fs_info->generic_worker);
+ fs_info->scrub_workers.idle_thresh = 4;
btrfs_start_workers(&fs_info->scrub_workers, 1);
+ }
++fs_info->scrub_workers_refcnt;
mutex_unlock(&fs_info->scrub_lock);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0bb4ebbb71b7..15634d4648d7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -723,6 +723,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",clear_cache");
if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
seq_puts(seq, ",user_subvol_rm_allowed");
+ if (btrfs_test_opt(root, ENOSPC_DEBUG))
+ seq_puts(seq, ",enospc_debug");
+ if (btrfs_test_opt(root, AUTO_DEFRAG))
+ seq_puts(seq, ",autodefrag");
+ if (btrfs_test_opt(root, INODE_MAP_CACHE))
+ seq_puts(seq, ",inode_cache");
return 0;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
#include "disk-io.h"
#include "transaction.h"
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_used(&root->root_item));
-}
-
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_limit(&root->root_item));
-}
-
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_root *, char *);
- ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
- show, store)
-
-ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
-ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
-
-static struct attribute *btrfs_root_attrs[] = {
- &btrfs_root_attr_blocks_used.attr,
- &btrfs_root_attr_block_limit.attr,
- NULL,
-};
-
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_fs_info *, char *);
- ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
- show, store)
-
-SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
-SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
-SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
-
-static struct attribute *btrfs_super_attrs[] = {
- &btrfs_super_attr_blocks_used.attr,
- &btrfs_super_attr_total_blocks.attr,
- &btrfs_super_attr_blocksize.attr,
- NULL,
-};
-
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->show ? a->show(fs, buf) : 0;
-}
-
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->store ? a->store(fs, buf, len) : 0;
-}
-
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
-
- return a->show ? a->show(root, buf) : 0;
-}
-
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
- return a->store ? a->store(root, buf, len) : 0;
-}
-
-static void btrfs_super_release(struct kobject *kobj)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- complete(&fs->kobj_unregister);
-}
-
-static void btrfs_root_release(struct kobject *kobj)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- complete(&root->kobj_unregister);
-}
-
-static const struct sysfs_ops btrfs_super_attr_ops = {
- .show = btrfs_super_attr_show,
- .store = btrfs_super_attr_store,
-};
-
-static const struct sysfs_ops btrfs_root_attr_ops = {
- .show = btrfs_root_attr_show,
- .store = btrfs_root_attr_store,
-};
-
/* /sys/fs/btrfs/ entry */
static struct kset *btrfs_kset;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dd719662340e..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
* to make sure the old root from before we joined the transaction is deleted
* when the transaction commits
*/
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->commit_root != root->node);
+ /*
+ * see below for in_trans_setup usage rules
+ * we have the reloc mutex held now, so there
+ * is only one writer in this function
+ */
+ root->in_trans_setup = 1;
+
+ /* make sure readers find in_trans_setup before
+ * they find our root->last_trans update
+ */
+ smp_wmb();
+
spin_lock(&root->fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid) {
spin_unlock(&root->fs_info->fs_roots_radix_lock);
return 0;
}
- root->last_trans = trans->transid;
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&root->fs_info->fs_roots_radix_lock);
+ root->last_trans = trans->transid;
+
+ /* this is pretty tricky. We don't want to
+ * take the relocation lock in btrfs_record_root_in_trans
+ * unless we're really doing the first setup for this root in
+ * this transaction.
+ *
+ * Normally we'd use root->last_trans as a flag to decide
+ * if we want to take the expensive mutex.
+ *
+ * But, we have to set root->last_trans before we
+ * init the relocation root, otherwise, we trip over warnings
+ * in ctree.c. The solution used here is to flag ourselves
+ * with root->in_trans_setup. When this is 1, we're still
+ * fixing up the reloc trees and everyone must wait.
+ *
+ * When this is zero, they can trust root->last_trans and fly
+ * through btrfs_record_root_in_trans without having to take the
+ * lock. smp_wmb() makes sure that all the writes above are
+ * done before we pop in the zero below
+ */
btrfs_init_reloc_root(trans, root);
+ smp_wmb();
+ root->in_trans_setup = 0;
}
return 0;
}
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ if (!root->ref_cows)
+ return 0;
+
+ /*
+ * see record_root_in_trans for comments about in_trans_setup usage
+ * and barriers
+ */
+ smp_rmb();
+ if (root->last_trans == trans->transid &&
+ !root->in_trans_setup)
+ return 0;
+
+ mutex_lock(&root->fs_info->reloc_mutex);
+ record_root_in_trans(trans, root);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
+ return 0;
+}
+
/* wait for commit against the current transaction to become unblocked
* when this is done, it is safe to start a new transaction, but the current
* transaction might not be fully on disk.
@@ -349,7 +406,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
list) {
if (t->in_commit) {
if (t->commit_done)
- goto out;
+ break;
cur_trans = t;
atomic_inc(&cur_trans->use_count);
break;
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
parent = dget_parent(dentry);
parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
- btrfs_record_root_in_trans(trans, parent_root);
+ record_root_in_trans(trans, parent_root);
/*
* insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
- btrfs_record_root_in_trans(trans, root);
+ /*
+ * pull in the delayed directory update
+ * and the delayed inode item
+ * otherwise we corrupt the FS during
+ * snapshot
+ */
+ ret = btrfs_run_delayed_items(trans, root);
+ BUG_ON(ret);
+
+ record_root_in_trans(trans, root);
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
int ret;
list_for_each_entry(pending, head, list) {
- /*
- * We must deal with the delayed items before creating
- * snapshots, or we will create a snapthot with inconsistent
- * information.
- */
- ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
- BUG_ON(ret);
-
ret = create_pending_snapshot(trans, fs_info, pending);
BUG_ON(ret);
}
@@ -1118,8 +1176,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
wait_current_trans_commit_start_and_unblock(root, cur_trans);
else
wait_current_trans_commit_start(root, cur_trans);
- put_transaction(cur_trans);
+ if (current->journal_info == trans)
+ current->journal_info = NULL;
+
+ put_transaction(cur_trans);
return 0;
}
@@ -1238,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
schedule_timeout(1);
finish_wait(&cur_trans->writer_wait, &wait);
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 1;
- spin_unlock(&root->fs_info->trans_lock);
} while (atomic_read(&cur_trans->num_writers) > 1 ||
(should_grow && cur_trans->num_joined != joined));
- ret = create_pending_snapshots(trans, root->fs_info);
- BUG_ON(ret);
+ /*
+ * Ok now we need to make sure to block out any other joins while we
+ * commit the transaction. We could have started a join before setting
+ * no_join so make sure to wait for num_writers to == 1 again.
+ */
+ spin_lock(&root->fs_info->trans_lock);
+ root->fs_info->trans_no_join = 1;
+ spin_unlock(&root->fs_info->trans_lock);
+ wait_event(cur_trans->writer_wait,
+ atomic_read(&cur_trans->num_writers) == 1);
+
+ /*
+ * the reloc mutex makes sure that we stop
+ * the balancing code from coming in and moving
+ * extents around in the middle of the commit
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
ret = btrfs_run_delayed_items(trans, root);
BUG_ON(ret);
+ ret = create_pending_snapshots(trans, root->fs_info);
+ BUG_ON(ret);
+
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
+ /*
+ * make sure none of the code above managed to slip in a
+ * delayed item
+ */
+ btrfs_assert_delayed_root_empty(root);
+
WARN_ON(cur_trans != trans->transaction);
btrfs_scrub_pause(root);
@@ -1309,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
wake_up(&root->fs_info->transaction_wait);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
- BUG_ON(!wc.replay_dest);
+ BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(trans, wc.replay_dest);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index da541dfca2e3..19450bc53632 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
transid = btrfs_super_generation(disk_super);
if (disk_super->label[0])
printk(KERN_INFO "device label %s ", disk_super->label);
- else {
- /* FIXME, make a readl uuid parser */
- printk(KERN_INFO "device fsid %llx-%llx ",
- *(unsigned long long *)disk_super->fsid,
- *(unsigned long long *)(disk_super->fsid + 8));
- }
+ else
+ printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
printk(KERN_CONT "devid %llu transid %llu %s\n",
(unsigned long long)devid, (unsigned long long)transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
@@ -2102,7 +2098,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
chunk_root->root_key.objectid,
found_key.objectid,
found_key.offset);
- BUG_ON(ret && ret != -ENOSPC);
+ if (ret && ret != -ENOSPC)
+ goto error;
key.offset = found_key.offset - 1;
}
ret = 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index 49c9aada0374..1a80b048ade8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1902,10 +1902,8 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
if (!buffer_uptodate(*wait_bh))
err = -EIO;
}
- if (unlikely(err)) {
+ if (unlikely(err))
page_zero_new_buffers(page, from, to);
- ClearPageUptodate(page);
- }
return err;
}
EXPORT_SYMBOL(__block_write_begin);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 33da49dc3cc6..5a3953db8118 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -453,7 +453,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
int err;
struct inode *inode = page->mapping->host;
BUG_ON(!inode);
- igrab(inode);
+ ihold(inode);
err = writepage_nounlock(page, wbc);
unlock_page(page);
iput(inode);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1f72b00447c4..f605753c8fe9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2940,14 +2940,12 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
while (!list_empty(&mdsc->cap_dirty)) {
ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
i_dirty_item);
- inode = igrab(&ci->vfs_inode);
+ inode = &ci->vfs_inode;
+ ihold(inode);
dout("flush_dirty_caps %p\n", inode);
spin_unlock(&mdsc->cap_dirty_lock);
- if (inode) {
- ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
- NULL);
- iput(inode);
- }
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
+ iput(inode);
spin_lock(&mdsc->cap_dirty_lock);
}
spin_unlock(&mdsc->cap_dirty_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 33729e822bb9..ef8f08c343e8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -308,7 +308,8 @@ more:
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_dentry = dget(filp->f_dentry);
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
@@ -787,10 +788,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
err = ceph_mdsc_do_request(mdsc, dir, req);
- if (err)
+ if (err) {
d_drop(dentry);
- else if (!req->r_reply_info.head->is_dentry)
- d_instantiate(dentry, igrab(old_dentry->d_inode));
+ } else if (!req->r_reply_info.head->is_dentry) {
+ ihold(old_dentry->d_inode);
+ d_instantiate(dentry, old_dentry->d_inode);
+ }
ceph_mdsc_put_request(req);
return err;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index a610d3d67488..f67b687550de 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -109,7 +109,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
if (inode)
- igrab(inode);
+ ihold(inode);
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(-ESTALE);
@@ -167,7 +167,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
if (inode)
- igrab(inode);
+ ihold(inode);
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(err ? err : -ESTALE);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 203252d88d9f..4698a5c553dc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -191,7 +191,8 @@ int ceph_open(struct inode *inode, struct file *file)
err = PTR_ERR(req);
goto out;
}
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
if (!err)
@@ -282,14 +283,13 @@ int ceph_release(struct inode *inode, struct file *file)
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof, bool align_to_pages,
+ int *checkeof, bool o_direct,
unsigned long buf_align)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len;
int io_align, page_align;
- int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
int left, pages_left;
int read;
struct page **page_pos;
@@ -307,7 +307,7 @@ static int striped_read(struct inode *inode,
io_align = off & ~PAGE_MASK;
more:
- if (align_to_pages)
+ if (o_direct)
page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
else
page_align = pos & ~PAGE_MASK;
@@ -317,20 +317,19 @@ more:
ci->i_truncate_seq,
ci->i_truncate_size,
page_pos, pages_left, page_align);
- hit_stripe = this_len < left;
- was_short = ret >= 0 && ret < this_len;
if (ret == -ENOENT)
ret = 0;
+ hit_stripe = this_len < left;
+ was_short = ret >= 0 && ret < this_len;
dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
if (ret > 0) {
- int didpages =
- ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT;
+ int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
if (read < pos - off) {
dout(" zero gap %llu to %llu\n", off + read, pos);
- ceph_zero_page_vector_range(page_off + read,
+ ceph_zero_page_vector_range(page_align + read,
pos - off - read, pages);
}
pos += ret;
@@ -345,20 +344,22 @@ more:
}
if (was_short) {
- /* was original extent fully inside i_size? */
- if (pos + left <= inode->i_size) {
- dout("zero tail\n");
- ceph_zero_page_vector_range(page_off + read, len - read,
+ /* did we bounce off eof? */
+ if (pos + left > inode->i_size)
+ *checkeof = 1;
+
+ /* zero trailing bytes (inside i_size) */
+ if (left > 0 && pos < inode->i_size) {
+ if (pos + left > inode->i_size)
+ left = inode->i_size - pos;
+
+ dout("zero tail %d\n", left);
+ ceph_zero_page_vector_range(page_align + read, left,
pages);
- read = len;
- goto out;
+ read += left;
}
-
- /* check i_size */
- *checkeof = 1;
}
-out:
if (ret >= 0)
ret = read;
dout("striped_read returns %d\n", ret);
@@ -475,9 +476,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
else
pos = *offset;
- io_align = pos & ~PAGE_MASK;
- buf_align = (unsigned long)data & ~PAGE_MASK;
-
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
if (ret < 0)
return ret;
@@ -501,6 +499,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
* boundary. this isn't atomic, unfortunately. :(
*/
more:
+ io_align = pos & ~PAGE_MASK;
+ buf_align = (unsigned long)data & ~PAGE_MASK;
len = left;
if (file->f_flags & O_DIRECT) {
/* write from beginning of first page, regardless of
@@ -590,6 +590,7 @@ out:
pos += len;
written += len;
left -= len;
+ data += written;
if (left)
goto more;
@@ -658,7 +659,7 @@ out:
/* hit EOF or hole? */
if (statret == 0 && *ppos < inode->i_size) {
- dout("aio_read sync_read hit hole, reading more\n");
+ dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
read += ret;
base += ret;
len -= ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 70b6a4839c38..d8858e96ab18 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1101,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
goto done;
}
req->r_dentry = dn; /* may have spliced */
- igrab(in);
+ ihold(in);
} else if (ceph_ino(in) == vino.ino &&
ceph_snap(in) == vino.snap) {
- igrab(in);
+ ihold(in);
} else {
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
dn, in, ceph_ino(in), ceph_snap(in),
@@ -1144,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
goto done;
}
req->r_dentry = dn; /* may have spliced */
- igrab(in);
+ ihold(in);
rinfo->head->is_dentry = 1; /* fool notrace handlers */
}
@@ -1328,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode)
if (queue_work(ceph_inode_to_client(inode)->wb_wq,
&ceph_inode(inode)->i_wb_work)) {
dout("ceph_queue_writeback %p\n", inode);
- igrab(inode);
+ ihold(inode);
} else {
dout("ceph_queue_writeback %p failed\n", inode);
}
@@ -1353,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode)
if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
&ceph_inode(inode)->i_pg_inv_work)) {
dout("ceph_queue_invalidate %p\n", inode);
- igrab(inode);
+ ihold(inode);
} else {
dout("ceph_queue_invalidate %p failed\n", inode);
}
@@ -1477,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
&ci->i_vmtruncate_work)) {
dout("ceph_queue_vmtruncate %p\n", inode);
- igrab(inode);
+ ihold(inode);
} else {
dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
inode, ci->i_truncate_pending);
@@ -1738,7 +1738,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
__mark_inode_dirty(inode, inode_dirty_flags);
if (mask) {
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
@@ -1779,7 +1780,8 @@ int ceph_do_getattr(struct inode *inode, int mask)
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_num_caps = 1;
req->r_args.getattr.mask = cpu_to_le32(mask);
err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8888c9ba68db..ef0b5f48e13a 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -73,7 +73,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
req->r_args.setlayout.layout.fl_stripe_unit =
@@ -135,7 +136,8 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_args.setlayout.layout.fl_stripe_unit =
cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 476b329867d4..80576d05d687 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -23,7 +23,8 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
/* mds requires start and length rather than start and end */
if (LLONG_MAX == fl->fl_end)
@@ -32,11 +33,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
length = fl->fl_end - fl->fl_start + 1;
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
- "length: %llu, wait: %d, type`: %d", (int)lock_type,
+ "length: %llu, wait: %d, type: %d", (int)lock_type,
(int)operation, (u64)fl->fl_pid, fl->fl_start,
length, wait, fl->fl_type);
-
req->r_args.filelock_change.rule = lock_type;
req->r_args.filelock_change.type = cmd;
req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
@@ -70,7 +70,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
}
ceph_mdsc_put_request(req);
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
- "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
+ "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
(int)operation, (u64)fl->fl_pid, fl->fl_start,
length, wait, fl->fl_type, err);
return err;
@@ -109,16 +109,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
dout("mds locked, locking locally");
err = posix_lock_file(file, fl, NULL);
if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
- /* undo! This should only happen if the kernel detects
- * local deadlock. */
+ /* undo! This should only happen if
+ * the kernel detects local
+ * deadlock. */
ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
CEPH_LOCK_UNLOCK, 0, fl);
- dout("got %d on posix_lock_file, undid lock", err);
+ dout("got %d on posix_lock_file, undid lock",
+ err);
}
}
- } else {
- dout("mds returned error code %d", err);
+ } else if (err == -ERESTARTSYS) {
+ dout("undoing lock\n");
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
@@ -155,8 +159,11 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
- } else {
- dout("mds error code %d", err);
+ } else if (err == -ERESTARTSYS) {
+ dout("undoing lock\n");
+ ceph_lock_message(CEPH_LOCK_FLOCK,
+ CEPH_MDS_OP_SETFILELOCK,
+ file, CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 24067d68a554..54b14de2e729 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -722,7 +722,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
ci = list_first_entry(&mdsc->snap_flush_list,
struct ceph_inode_info, i_snap_flush_item);
inode = &ci->vfs_inode;
- igrab(inode);
+ ihold(inode);
spin_unlock(&mdsc->snap_flush_lock);
spin_lock(&inode->i_lock);
__ceph_flush_snaps(ci, &session, 0);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f2b628696180..f42d730f1b66 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -665,7 +665,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = PTR_ERR(req);
goto out;
}
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
req->r_num_caps = 1;
req->r_args.setxattr.flags = cpu_to_le32(flags);
@@ -795,7 +796,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- req->r_inode = igrab(inode);
+ req->r_inode = inode;
+ ihold(inode);
req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
req->r_num_caps = 1;
req->r_path2 = kstrdup(name, GFP_NOFS);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 53ed1ad2c112..f66cc1625150 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -156,6 +156,6 @@ config CIFS_ACL
config CIFS_NFSD_EXPORT
bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
- depends on CIFS && EXPERIMENTAL
+ depends on CIFS && EXPERIMENTAL && BROKEN
help
Allows NFS server to export a CIFS mounted share (nfsd over cifs)
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index dd8584d35a14..545509c3313b 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -92,7 +92,7 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
break;
default:
- cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family);
+ cERROR(1, "Unknown network family '%d'", sa->sa_family);
key_len = 0;
break;
}
@@ -152,7 +152,7 @@ static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
sharename = extract_sharename(tcon->treeName);
if (IS_ERR(sharename)) {
- cFYI(1, "CIFS: couldn't extract sharename\n");
+ cFYI(1, "%s: couldn't extract sharename\n", __func__);
sharename = NULL;
return 0;
}
@@ -302,7 +302,7 @@ static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
pagevec_init(&pvec, 0);
first = 0;
- cFYI(1, "cifs inode 0x%p now uncached", cifsi);
+ cFYI(1, "%s: cifs inode 0x%p now uncached", __func__, cifsi);
for (;;) {
nr_pages = pagevec_lookup(&pvec,
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ffb1459dc6ec..7260e11e21f8 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -42,6 +42,7 @@
#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
#define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */
#define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */
+#define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */
struct cifs_sb_info {
struct rb_root tlink_tree;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 989442dcfb45..35f9154615fa 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -104,8 +104,7 @@ cifs_sb_deactive(struct super_block *sb)
}
static int
-cifs_read_super(struct super_block *sb, struct smb_vol *volume_info,
- const char *devname, int silent)
+cifs_read_super(struct super_block *sb)
{
struct inode *inode;
struct cifs_sb_info *cifs_sb;
@@ -113,22 +112,16 @@ cifs_read_super(struct super_block *sb, struct smb_vol *volume_info,
cifs_sb = CIFS_SB(sb);
- spin_lock_init(&cifs_sb->tlink_tree_lock);
- cifs_sb->tlink_tree = RB_ROOT;
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
+ sb->s_flags |= MS_POSIXACL;
- rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
- if (rc)
- return rc;
-
- cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
+ if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES)
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ else
+ sb->s_maxbytes = MAX_NON_LFS;
- rc = cifs_mount(sb, cifs_sb, volume_info, devname);
-
- if (rc) {
- if (!silent)
- cERROR(1, "cifs_mount failed w/return code = %d", rc);
- goto out_mount_failed;
- }
+ /* BB FIXME fix time_gran to be larger for LANMAN sessions */
+ sb->s_time_gran = 100;
sb->s_magic = CIFS_MAGIC_NUMBER;
sb->s_op = &cifs_super_ops;
@@ -170,37 +163,14 @@ out_no_root:
if (inode)
iput(inode);
- cifs_umount(sb, cifs_sb);
-
-out_mount_failed:
- bdi_destroy(&cifs_sb->bdi);
return rc;
}
-static void
-cifs_put_super(struct super_block *sb)
+static void cifs_kill_sb(struct super_block *sb)
{
- int rc = 0;
- struct cifs_sb_info *cifs_sb;
-
- cFYI(1, "In cifs_put_super");
- cifs_sb = CIFS_SB(sb);
- if (cifs_sb == NULL) {
- cFYI(1, "Empty cifs superblock info passed to unmount");
- return;
- }
-
- rc = cifs_umount(sb, cifs_sb);
- if (rc)
- cERROR(1, "cifs_umount failed with return code %d", rc);
- if (cifs_sb->mountdata) {
- kfree(cifs_sb->mountdata);
- cifs_sb->mountdata = NULL;
- }
-
- unload_nls(cifs_sb->local_nls);
- bdi_destroy(&cifs_sb->bdi);
- kfree(cifs_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ kill_anon_super(sb);
+ cifs_umount(cifs_sb);
}
static int
@@ -257,9 +227,6 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
{
struct cifs_sb_info *cifs_sb;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
-
cifs_sb = CIFS_SB(inode->i_sb);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -352,6 +319,37 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
}
}
+static void
+cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
+{
+ seq_printf(s, ",sec=");
+
+ switch (server->secType) {
+ case LANMAN:
+ seq_printf(s, "lanman");
+ break;
+ case NTLMv2:
+ seq_printf(s, "ntlmv2");
+ break;
+ case NTLM:
+ seq_printf(s, "ntlm");
+ break;
+ case Kerberos:
+ seq_printf(s, "krb5");
+ break;
+ case RawNTLMSSP:
+ seq_printf(s, "ntlmssp");
+ break;
+ default:
+ /* shouldn't ever happen */
+ seq_printf(s, "unknown");
+ break;
+ }
+
+ if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ seq_printf(s, "i");
+}
+
/*
* cifs_show_options() is for displaying mount options in /proc/mounts.
* Not all settable options are displayed but most of the important
@@ -365,6 +363,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
struct sockaddr *srcaddr;
srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
+ cifs_show_security(s, tcon->ses->server);
+
seq_printf(s, ",unc=%s", tcon->treeName);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
@@ -518,7 +518,6 @@ static int cifs_drop_inode(struct inode *inode)
}
static const struct super_operations cifs_super_ops = {
- .put_super = cifs_put_super,
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
.destroy_inode = cifs_destroy_inode,
@@ -555,7 +554,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
full_path = cifs_build_path_to_root(vol, cifs_sb,
cifs_sb_master_tcon(cifs_sb));
if (full_path == NULL)
- return NULL;
+ return ERR_PTR(-ENOMEM);
cFYI(1, "Get root dentry for %s", full_path);
@@ -584,7 +583,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
dchild = d_alloc(dparent, &name);
if (dchild == NULL) {
dput(dparent);
- dparent = NULL;
+ dparent = ERR_PTR(-ENOMEM);
goto out;
}
}
@@ -602,7 +601,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
if (rc) {
dput(dchild);
dput(dparent);
- dparent = NULL;
+ dparent = ERR_PTR(rc);
goto out;
}
alias = d_materialise_unique(dchild, inode);
@@ -610,7 +609,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
dput(dchild);
if (IS_ERR(alias)) {
dput(dparent);
- dparent = NULL;
+ dparent = ERR_PTR(-EINVAL); /* XXX */
goto out;
}
dchild = alias;
@@ -630,6 +629,13 @@ out:
return dparent;
}
+static int cifs_set_super(struct super_block *sb, void *data)
+{
+ struct cifs_mnt_data *mnt_data = data;
+ sb->s_fs_info = mnt_data->cifs_sb;
+ return set_anon_super(sb, NULL);
+}
+
static struct dentry *
cifs_do_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
@@ -650,75 +656,73 @@ cifs_do_mount(struct file_system_type *fs_type,
cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
if (cifs_sb == NULL) {
root = ERR_PTR(-ENOMEM);
- goto out;
+ goto out_nls;
+ }
+
+ cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
+ if (cifs_sb->mountdata == NULL) {
+ root = ERR_PTR(-ENOMEM);
+ goto out_cifs_sb;
}
cifs_setup_cifs_sb(volume_info, cifs_sb);
+ rc = cifs_mount(cifs_sb, volume_info);
+ if (rc) {
+ if (!(flags & MS_SILENT))
+ cERROR(1, "cifs_mount failed w/return code = %d", rc);
+ root = ERR_PTR(rc);
+ goto out_mountdata;
+ }
+
mnt_data.vol = volume_info;
mnt_data.cifs_sb = cifs_sb;
mnt_data.flags = flags;
- sb = sget(fs_type, cifs_match_super, set_anon_super, &mnt_data);
+ sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data);
if (IS_ERR(sb)) {
root = ERR_CAST(sb);
- goto out_cifs_sb;
+ cifs_umount(cifs_sb);
+ goto out;
}
- if (sb->s_fs_info) {
+ if (sb->s_root) {
cFYI(1, "Use existing superblock");
- goto out_shared;
- }
-
- /*
- * Copy mount params for use in submounts. Better to do
- * the copy here and deal with the error before cleanup gets
- * complicated post-mount.
- */
- cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
- if (cifs_sb->mountdata == NULL) {
- root = ERR_PTR(-ENOMEM);
- goto out_super;
- }
-
- sb->s_flags = flags;
- /* BB should we make this contingent on mount parm? */
- sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
- sb->s_fs_info = cifs_sb;
+ cifs_umount(cifs_sb);
+ } else {
+ sb->s_flags = flags;
+ /* BB should we make this contingent on mount parm? */
+ sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
+
+ rc = cifs_read_super(sb);
+ if (rc) {
+ root = ERR_PTR(rc);
+ goto out_super;
+ }
- rc = cifs_read_super(sb, volume_info, dev_name,
- flags & MS_SILENT ? 1 : 0);
- if (rc) {
- root = ERR_PTR(rc);
- goto out_super;
+ sb->s_flags |= MS_ACTIVE;
}
- sb->s_flags |= MS_ACTIVE;
-
root = cifs_get_root(volume_info, sb);
- if (root == NULL)
+ if (IS_ERR(root))
goto out_super;
cFYI(1, "dentry root is: %p", root);
goto out;
-out_shared:
- root = cifs_get_root(volume_info, sb);
- if (root)
- cFYI(1, "dentry root is: %p", root);
- goto out;
-
out_super:
- kfree(cifs_sb->mountdata);
deactivate_locked_super(sb);
-
-out_cifs_sb:
- unload_nls(cifs_sb->local_nls);
- kfree(cifs_sb);
-
out:
cifs_cleanup_volume_info(&volume_info);
return root;
+
+out_mountdata:
+ kfree(cifs_sb->mountdata);
+out_cifs_sb:
+ kfree(cifs_sb);
+out_nls:
+ unload_nls(volume_info->local_nls);
+ goto out;
}
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -807,7 +811,7 @@ struct file_system_type cifs_fs_type = {
.owner = THIS_MODULE,
.name = "cifs",
.mount = cifs_do_mount,
- .kill_sb = kill_anon_super,
+ .kill_sb = cifs_kill_sb,
/* .fs_flags */
};
const struct inode_operations cifs_dir_inode_ops = {
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 64313f778ebf..0900e1658c96 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -129,5 +129,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "1.72"
+#define CIFS_VERSION "1.73"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 953f84413c77..257f312ede42 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -157,9 +157,8 @@ extern int cifs_match_super(struct super_block *, void *);
extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info);
extern int cifs_setup_volume_info(struct smb_vol **pvolume_info,
char *mount_data, const char *devname);
-extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
- struct smb_vol *, const char *);
-extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
+extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
+extern void cifs_umount(struct cifs_sb_info *);
extern void cifs_dfs_release_automount_timer(void);
void cifs_proc_init(void);
void cifs_proc_clean(void);
@@ -218,7 +217,8 @@ extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo,
struct dfs_info3_param **preferrals,
int remap);
extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
- struct super_block *sb, struct smb_vol *vol);
+ struct cifs_sb_info *cifs_sb,
+ struct smb_vol *vol);
extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon,
struct kstatfs *FSData);
extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index bb659eb73810..c8cb83ef6f6f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -152,7 +152,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
mid_entry->callback(mid_entry);
}
- while (server->tcpStatus == CifsNeedReconnect) {
+ do {
try_to_freeze();
/* we should try only the port we connected to before */
@@ -167,7 +167,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->tcpStatus = CifsNeedNegotiate;
spin_unlock(&GlobalMid_Lock);
}
- }
+ } while (server->tcpStatus == CifsNeedReconnect);
return rc;
}
@@ -2149,7 +2149,10 @@ cifs_put_tlink(struct tcon_link *tlink)
}
static inline struct tcon_link *
-cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb);
+cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
+{
+ return cifs_sb->master_tlink;
+}
static int
compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
@@ -2471,14 +2474,6 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (rc < 0)
return rc;
- rc = socket->ops->connect(socket, saddr, slen, 0);
- if (rc < 0) {
- cFYI(1, "Error %d connecting to server", rc);
- sock_release(socket);
- server->ssocket = NULL;
- return rc;
- }
-
/*
* Eventually check for other socket options to change from
* the default. sock_setsockopt not used because it expects
@@ -2507,6 +2502,14 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket->sk->sk_sndbuf,
socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
+ rc = socket->ops->connect(socket, saddr, slen, 0);
+ if (rc < 0) {
+ cFYI(1, "Error %d connecting to server", rc);
+ sock_release(socket);
+ server->ssocket = NULL;
+ return rc;
+ }
+
if (sport == htons(RFC1001_PORT))
rc = ip_rfc1001_connect(server);
@@ -2543,7 +2546,7 @@ ip_connect(struct TCP_Server_Info *server)
}
void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
- struct super_block *sb, struct smb_vol *vol_info)
+ struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info)
{
/* if we are reconnecting then should we check to see if
* any requested capabilities changed locally e.g. via
@@ -2597,22 +2600,23 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon,
cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
cFYI(1, "negotiated posix acl support");
- if (sb)
- sb->s_flags |= MS_POSIXACL;
+ if (cifs_sb)
+ cifs_sb->mnt_cifs_flags |=
+ CIFS_MOUNT_POSIXACL;
}
if (vol_info && vol_info->posix_paths == 0)
cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
cFYI(1, "negotiate posix pathnames");
- if (sb)
- CIFS_SB(sb)->mnt_cifs_flags |=
+ if (cifs_sb)
+ cifs_sb->mnt_cifs_flags |=
CIFS_MOUNT_POSIX_PATHS;
}
- if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
+ if (cifs_sb && (cifs_sb->rsize > 127 * 1024)) {
if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
- CIFS_SB(sb)->rsize = 127 * 1024;
+ cifs_sb->rsize = 127 * 1024;
cFYI(DBG2, "larger reads not supported by srv");
}
}
@@ -2659,6 +2663,9 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
{
INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
+ spin_lock_init(&cifs_sb->tlink_tree_lock);
+ cifs_sb->tlink_tree = RB_ROOT;
+
if (pvolume_info->rsize > CIFSMaxBufSize) {
cERROR(1, "rsize %d too large, using MaxBufSize",
pvolume_info->rsize);
@@ -2747,21 +2754,21 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
/*
* When the server supports very large writes via POSIX extensions, we can
- * allow up to 2^24 - PAGE_CACHE_SIZE.
+ * allow up to 2^24-1, minus the size of a WRITE_AND_X header, not including
+ * the RFC1001 length.
*
* Note that this might make for "interesting" allocation problems during
- * writeback however (as we have to allocate an array of pointers for the
- * pages). A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
+ * writeback however as we have to allocate an array of pointers for the
+ * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
*/
-#define CIFS_MAX_WSIZE ((1<<24) - PAGE_CACHE_SIZE)
+#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4)
/*
- * When the server doesn't allow large posix writes, default to a wsize of
- * 128k - PAGE_CACHE_SIZE -- one page less than the largest frame size
- * described in RFC1001. This allows space for the header without going over
- * that by default.
+ * When the server doesn't allow large posix writes, only allow a wsize of
+ * 128k minus the size of the WRITE_AND_X header. That allows for a write up
+ * to the maximum size described by RFC1002.
*/
-#define CIFS_MAX_RFC1001_WSIZE (128 * 1024 - PAGE_CACHE_SIZE)
+#define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4)
/*
* The default wsize is 1M. find_get_pages seems to return a maximum of 256
@@ -2780,11 +2787,18 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
/* can server support 24-bit write sizes? (via UNIX extensions) */
if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
- wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1001_WSIZE);
+ wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE);
- /* no CAP_LARGE_WRITE_X? Limit it to 16 bits */
- if (!(server->capabilities & CAP_LARGE_WRITE_X))
- wsize = min_t(unsigned int, wsize, USHRT_MAX);
+ /*
+ * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set?
+ * Limit it to max buffer offered by the server, minus the size of the
+ * WRITEX header, not including the 4 byte RFC1001 length.
+ */
+ if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
+ (!(server->capabilities & CAP_UNIX) &&
+ (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
+ wsize = min_t(unsigned int, wsize,
+ server->maxBuf - sizeof(WRITE_REQ) + 4);
/* hard limit of CIFS_MAX_WSIZE */
wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
@@ -2934,7 +2948,11 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data,
if (volume_info->nullauth) {
cFYI(1, "null user");
- volume_info->username = "";
+ volume_info->username = kzalloc(1, GFP_KERNEL);
+ if (volume_info->username == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
} else if (volume_info->username) {
/* BB fixme parse for domain name here */
cFYI(1, "Username: %s", volume_info->username);
@@ -2968,8 +2986,7 @@ out:
}
int
-cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
- struct smb_vol *volume_info, const char *devname)
+cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
{
int rc = 0;
int xid;
@@ -2980,6 +2997,13 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
struct tcon_link *tlink;
#ifdef CONFIG_CIFS_DFS_UPCALL
int referral_walks_count = 0;
+
+ rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
+ if (rc)
+ return rc;
+
+ cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
+
try_mount_again:
/* cleanup activities if we're chasing a referral */
if (referral_walks_count) {
@@ -3004,6 +3028,7 @@ try_mount_again:
srvTcp = cifs_get_tcp_session(volume_info);
if (IS_ERR(srvTcp)) {
rc = PTR_ERR(srvTcp);
+ bdi_destroy(&cifs_sb->bdi);
goto out;
}
@@ -3015,14 +3040,6 @@ try_mount_again:
goto mount_fail_check;
}
- if (pSesInfo->capabilities & CAP_LARGE_FILES)
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- else
- sb->s_maxbytes = MAX_NON_LFS;
-
- /* BB FIXME fix time_gran to be larger for LANMAN sessions */
- sb->s_time_gran = 100;
-
/* search for existing tcon to this server share */
tcon = cifs_get_tcon(pSesInfo, volume_info);
if (IS_ERR(tcon)) {
@@ -3035,7 +3052,7 @@ try_mount_again:
if (tcon->ses->capabilities & CAP_UNIX) {
/* reset of caps checks mount to see if unix extensions
disabled for just this mount */
- reset_cifs_unix_caps(xid, tcon, sb, volume_info);
+ reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info);
if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
(le64_to_cpu(tcon->fsUnixInfo.Capability) &
CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
@@ -3158,6 +3175,7 @@ mount_fail_check:
cifs_put_smb_ses(pSesInfo);
else
cifs_put_tcp_session(srvTcp);
+ bdi_destroy(&cifs_sb->bdi);
goto out;
}
@@ -3171,6 +3189,10 @@ out:
return rc;
}
+/*
+ * Issue a TREE_CONNECT request. Note that for IPC$ shares, that the tcon
+ * pointer may be NULL.
+ */
int
CIFSTCon(unsigned int xid, struct cifs_ses *ses,
const char *tree, struct cifs_tcon *tcon,
@@ -3205,7 +3227,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
pSMB->AndXCommand = 0xFF;
pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
bcc_ptr = &pSMB->Password[0];
- if ((ses->server->sec_mode) & SECMODE_USER) {
+ if (!tcon || (ses->server->sec_mode & SECMODE_USER)) {
pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
*bcc_ptr = 0; /* password is null byte */
bcc_ptr++; /* skip password */
@@ -3328,8 +3350,8 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
return rc;
}
-int
-cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
+void
+cifs_umount(struct cifs_sb_info *cifs_sb)
{
struct rb_root *root = &cifs_sb->tlink_tree;
struct rb_node *node;
@@ -3350,7 +3372,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
}
spin_unlock(&cifs_sb->tlink_tree_lock);
- return 0;
+ bdi_destroy(&cifs_sb->bdi);
+ kfree(cifs_sb->mountdata);
+ unload_nls(cifs_sb->local_nls);
+ kfree(cifs_sb);
}
int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
@@ -3371,7 +3396,7 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
}
if (rc == 0) {
spin_lock(&GlobalMid_Lock);
- if (server->tcpStatus != CifsExiting)
+ if (server->tcpStatus == CifsNeedNegotiate)
server->tcpStatus = CifsGood;
else
rc = -EHOSTDOWN;
@@ -3484,12 +3509,6 @@ out:
return tcon;
}
-static inline struct tcon_link *
-cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
-{
- return cifs_sb->master_tlink;
-}
-
struct cifs_tcon *
cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
{
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index d368a47ba5eb..42e5363b4102 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -28,14 +28,14 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
server->fscache =
fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
&cifs_fscache_server_index_def, server);
- cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server,
- server->fscache);
+ cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
+ server->fscache);
}
void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
{
- cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server,
- server->fscache);
+ cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
+ server->fscache);
fscache_relinquish_cookie(server->fscache, 0);
server->fscache = NULL;
}
@@ -47,13 +47,13 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
tcon->fscache =
fscache_acquire_cookie(server->fscache,
&cifs_fscache_super_index_def, tcon);
- cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)",
- server->fscache, tcon->fscache);
+ cFYI(1, "%s: (0x%p/0x%p)", __func__, server->fscache,
+ tcon->fscache);
}
void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
{
- cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache);
+ cFYI(1, "%s: (0x%p)", __func__, tcon->fscache);
fscache_relinquish_cookie(tcon->fscache, 0);
tcon->fscache = NULL;
}
@@ -70,8 +70,8 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
&cifs_fscache_inode_object_def, cifsi);
- cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
- cifsi->fscache);
+ cFYI(1, "%s: got FH cookie (0x%p/0x%p)", __func__,
+ tcon->fscache, cifsi->fscache);
}
}
@@ -80,8 +80,7 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
- cFYI(1, "CIFS releasing inode cookie (0x%p)",
- cifsi->fscache);
+ cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
fscache_relinquish_cookie(cifsi->fscache, 0);
cifsi->fscache = NULL;
}
@@ -92,8 +91,8 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode)
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
- cFYI(1, "CIFS disabling inode cookie (0x%p)",
- cifsi->fscache);
+ cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
+ fscache_uncache_all_inode_pages(cifsi->fscache, inode);
fscache_relinquish_cookie(cifsi->fscache, 1);
cifsi->fscache = NULL;
}
@@ -121,8 +120,8 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
cifs_sb_master_tcon(cifs_sb)->fscache,
&cifs_fscache_inode_object_def,
cifsi);
- cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p",
- cifsi->fscache, old);
+ cFYI(1, "%s: new cookie 0x%p oldcookie 0x%p",
+ __func__, cifsi->fscache, old);
}
}
@@ -132,8 +131,8 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp)
struct inode *inode = page->mapping->host;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
- cFYI(1, "CIFS: fscache release page (0x%p/0x%p)",
- page, cifsi->fscache);
+ cFYI(1, "%s: (0x%p/0x%p)", __func__, page,
+ cifsi->fscache);
if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
return 0;
}
@@ -144,8 +143,7 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp)
static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
int error)
{
- cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)",
- page, error);
+ cFYI(1, "%s: (0x%p/%d)", __func__, page, error);
if (!error)
SetPageUptodate(page);
unlock_page(page);
@@ -158,7 +156,7 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
int ret;
- cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p",
+ cFYI(1, "%s: (fsc:%p, p:%p, i:0x%p", __func__,
CIFS_I(inode)->fscache, page, inode);
ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
cifs_readpage_from_fscache_complete,
@@ -167,11 +165,11 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
switch (ret) {
case 0: /* page found in fscache, read submitted */
- cFYI(1, "CIFS: readpage_from_fscache: submitted");
+ cFYI(1, "%s: submitted", __func__);
return ret;
case -ENOBUFS: /* page won't be cached */
case -ENODATA: /* page not in cache */
- cFYI(1, "CIFS: readpage_from_fscache %d", ret);
+ cFYI(1, "%s: %d", __func__, ret);
return 1;
default:
@@ -190,7 +188,7 @@ int __cifs_readpages_from_fscache(struct inode *inode,
{
int ret;
- cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)",
+ cFYI(1, "%s: (0x%p/%u/0x%p)", __func__,
CIFS_I(inode)->fscache, *nr_pages, inode);
ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
pages, nr_pages,
@@ -199,12 +197,12 @@ int __cifs_readpages_from_fscache(struct inode *inode,
mapping_gfp_mask(mapping));
switch (ret) {
case 0: /* read submitted to the cache for all pages */
- cFYI(1, "CIFS: readpages_from_fscache: submitted");
+ cFYI(1, "%s: submitted", __func__);
return ret;
case -ENOBUFS: /* some pages are not cached and can't be */
case -ENODATA: /* some pages are not cached */
- cFYI(1, "CIFS: readpages_from_fscache: no page");
+ cFYI(1, "%s: no page", __func__);
return 1;
default:
@@ -218,7 +216,7 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
{
int ret;
- cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p",
+ cFYI(1, "%s: (fsc: %p, p: %p, i: %p)", __func__,
CIFS_I(inode)->fscache, page, inode);
ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
if (ret != 0)
@@ -230,7 +228,7 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct fscache_cookie *cookie = cifsi->fscache;
- cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie);
+ cFYI(1, "%s: (0x%p/0x%p)", __func__, page, cookie);
fscache_wait_on_page_write(cookie, page);
fscache_uncache_page(cookie, page);
}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 1525d5e662b6..1c5b770c3141 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -90,12 +90,10 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
sg_init_one(&sgout, out, 8);
rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
- if (rc) {
+ if (rc)
cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
- crypto_free_blkcipher(tfm_des);
- goto smbhash_err;
- }
+ crypto_free_blkcipher(tfm_des);
smbhash_err:
return rc;
}
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 6cbb3afb36dc..cb140ef293e4 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -43,8 +43,6 @@ const struct file_operations coda_ioctl_operations = {
/* the coda pioctl inode ops */
static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
{
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
return (mask & MAY_EXEC) ? -EACCES : 0;
}
diff --git a/fs/dcookies.c b/fs/dcookies.c
index a21cabdbd87b..dda0dc702d1b 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -178,6 +178,8 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
/* FIXME: (deleted) ? */
path = d_path(&dcs->path, kbuf, PAGE_SIZE);
+ mutex_unlock(&dcookie_mutex);
+
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out_free;
@@ -194,6 +196,7 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
out_free:
kfree(kbuf);
+ return err;
out:
mutex_unlock(&dcookie_mutex);
return err;
diff --git a/fs/exec.c b/fs/exec.c
index 97e0d52d72fd..6075a1e727ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file)
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info)
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
{
struct file *rp, *wp;
struct fdtable *fdt;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2e29abb30f76..095c36f3b612 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -125,7 +125,7 @@ struct ext4_ext_path {
* positive retcode - signal for ext4_ext_walk_space(), see below
* callback must return valid extent (passed or newly created)
*/
-typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
+typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t,
struct ext4_ext_cache *,
struct ext4_extent *, void *);
@@ -133,8 +133,11 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_BREAK 1
#define EXT_REPEAT 2
-/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */
-#define EXT_MAX_BLOCK 0xffffffff
+/*
+ * Maximum number of logical blocks in a file; ext4_extent's ee_block is
+ * __le32.
+ */
+#define EXT_MAX_BLOCKS 0xffffffff
/*
* EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5199bac7fc62..f815cc81e7a2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1408,7 +1408,7 @@ got_index:
/*
* ext4_ext_next_allocated_block:
- * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
* NOTE: it considers block number from index entry as
* allocated block. Thus, index entries have to be consistent
* with leaves.
@@ -1422,7 +1422,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
depth = path->p_depth;
if (depth == 0 && path->p_ext == NULL)
- return EXT_MAX_BLOCK;
+ return EXT_MAX_BLOCKS;
while (depth >= 0) {
if (depth == path->p_depth) {
@@ -1439,12 +1439,12 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
depth--;
}
- return EXT_MAX_BLOCK;
+ return EXT_MAX_BLOCKS;
}
/*
* ext4_ext_next_leaf_block:
- * returns first allocated block from next leaf or EXT_MAX_BLOCK
+ * returns first allocated block from next leaf or EXT_MAX_BLOCKS
*/
static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
struct ext4_ext_path *path)
@@ -1456,7 +1456,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
/* zero-tree has no leaf blocks at all */
if (depth == 0)
- return EXT_MAX_BLOCK;
+ return EXT_MAX_BLOCKS;
/* go to index block */
depth--;
@@ -1469,7 +1469,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
depth--;
}
- return EXT_MAX_BLOCK;
+ return EXT_MAX_BLOCKS;
}
/*
@@ -1677,13 +1677,13 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
*/
if (b2 < b1) {
b2 = ext4_ext_next_allocated_block(path);
- if (b2 == EXT_MAX_BLOCK)
+ if (b2 == EXT_MAX_BLOCKS)
goto out;
}
/* check for wrap through zero on extent logical start block*/
if (b1 + len1 < b1) {
- len1 = EXT_MAX_BLOCK - b1;
+ len1 = EXT_MAX_BLOCKS - b1;
newext->ee_len = cpu_to_le16(len1);
ret = 1;
}
@@ -1767,7 +1767,7 @@ repeat:
fex = EXT_LAST_EXTENT(eh);
next = ext4_ext_next_leaf_block(inode, path);
if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
- && next != EXT_MAX_BLOCK) {
+ && next != EXT_MAX_BLOCKS) {
ext_debug("next leaf block - %d\n", next);
BUG_ON(npath != NULL);
npath = ext4_ext_find_extent(inode, next, NULL);
@@ -1887,7 +1887,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
BUG_ON(func == NULL);
BUG_ON(inode == NULL);
- while (block < last && block != EXT_MAX_BLOCK) {
+ while (block < last && block != EXT_MAX_BLOCKS) {
num = last - block;
/* find extent for this block */
down_read(&EXT4_I(inode)->i_data_sem);
@@ -1958,7 +1958,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
err = -EIO;
break;
}
- err = func(inode, path, &cbex, ex, cbdata);
+ err = func(inode, next, &cbex, ex, cbdata);
ext4_ext_drop_refs(path);
if (err < 0)
@@ -2020,7 +2020,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
if (ex == NULL) {
/* there is no extent yet, so gap is [0;-] */
lblock = 0;
- len = EXT_MAX_BLOCK;
+ len = EXT_MAX_BLOCKS;
ext_debug("cache gap(whole file):");
} else if (block < le32_to_cpu(ex->ee_block)) {
lblock = block;
@@ -2350,7 +2350,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* never happen because at least one of the end points
* needs to be on the edge of the extent.
*/
- if (end == EXT_MAX_BLOCK) {
+ if (end == EXT_MAX_BLOCKS - 1) {
ext_debug(" bad truncate %u:%u\n",
start, end);
block = 0;
@@ -2398,7 +2398,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* If this is a truncate, this condition
* should never happen
*/
- if (end == EXT_MAX_BLOCK) {
+ if (end == EXT_MAX_BLOCKS - 1) {
ext_debug(" bad truncate %u:%u\n",
start, end);
err = -EIO;
@@ -2478,7 +2478,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* we need to remove it from the leaf
*/
if (num == 0) {
- if (end != EXT_MAX_BLOCK) {
+ if (end != EXT_MAX_BLOCKS - 1) {
/*
* For hole punching, we need to scoot all the
* extents up when an extent is removed so that
@@ -3699,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode)
last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
- err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK);
+ err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
/* In a multi-transaction truncate, we only make the final
* transaction synchronous.
@@ -3914,14 +3914,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
/*
* Callback function called for each extent to gather FIEMAP information.
*/
-static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
+static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
struct ext4_ext_cache *newex, struct ext4_extent *ex,
void *data)
{
__u64 logical;
__u64 physical;
__u64 length;
- loff_t size;
__u32 flags = 0;
int ret = 0;
struct fiemap_extent_info *fieinfo = data;
@@ -4103,8 +4102,7 @@ found_delayed_extent:
if (ex && ext4_ext_is_uninitialized(ex))
flags |= FIEMAP_EXTENT_UNWRITTEN;
- size = i_size_read(inode);
- if (logical + length >= size)
+ if (next == EXT_MAX_BLOCKS)
flags |= FIEMAP_EXTENT_LAST;
ret = fiemap_fill_next_extent(fieinfo, logical, physical,
@@ -4347,8 +4345,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
start_blk = start >> inode->i_sb->s_blocksize_bits;
last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
- if (last_blk >= EXT_MAX_BLOCK)
- last_blk = EXT_MAX_BLOCK-1;
+ if (last_blk >= EXT_MAX_BLOCKS)
+ last_blk = EXT_MAX_BLOCKS-1;
len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a5763e3505ba..e3126c051006 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2634,7 +2634,7 @@ static int ext4_writepage(struct page *page,
struct buffer_head *page_bufs = NULL;
struct inode *inode = page->mapping->host;
- trace_ext4_writepage(inode, page);
+ trace_ext4_writepage(page);
size = i_size_read(inode);
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 859f2ae8864e..6ed859d56850 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3578,8 +3578,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
free += next - bit;
trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
- trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
- grp_blk_start + bit, next - bit);
+ trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
+ next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
@@ -3608,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
ext4_group_t group;
ext4_grpblk_t bit;
- trace_ext4_mb_release_group_pa(sb, pa);
+ trace_ext4_mb_release_group_pa(pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -4448,7 +4448,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* @inode: inode
* @block: start physical block to free
* @count: number of blocks to count
- * @metadata: Are these metadata blocks
+ * @flags: flags used by ext4_free_blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2b8304bf3c50..f57455a1b1b2 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1002,12 +1002,12 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
- if ((orig_start > EXT_MAX_BLOCK) ||
- (donor_start > EXT_MAX_BLOCK) ||
- (*len > EXT_MAX_BLOCK) ||
- (orig_start + *len > EXT_MAX_BLOCK)) {
+ if ((orig_start >= EXT_MAX_BLOCKS) ||
+ (donor_start >= EXT_MAX_BLOCKS) ||
+ (*len > EXT_MAX_BLOCKS) ||
+ (orig_start + *len >= EXT_MAX_BLOCKS)) {
ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
- "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK,
+ "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cc5c157aa11d..9ea71aa864b3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2243,6 +2243,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
* in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
* so that won't be a limiting factor.
*
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
* Note, this does *not* consider any metadata overhead for vfs i_blocks.
*/
static loff_t ext4_max_size(int blkbits, int has_huge_files)
@@ -2264,10 +2270,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
upper_limit <<= blkbits;
}
- /* 32-bit extent-start container, ee_block */
- res = 1LL << 32;
+ /*
+ * 32-bit extent-start container, ee_block. We lower the maxbytes
+ * by one fs block, so ee_len can cover the extent of maximum file
+ * size
+ */
+ res = (1LL << 32) - 1;
res <<= blkbits;
- res -= 1;
/* Sanity check against vm- & vfs- imposed limits */
if (res > upper_limit)
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index a2a5d19ece6a..2f343b4d7a7d 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -954,3 +954,47 @@ void fscache_mark_pages_cached(struct fscache_retrieval *op,
pagevec_reinit(pagevec);
}
EXPORT_SYMBOL(fscache_mark_pages_cached);
+
+/*
+ * Uncache all the pages in an inode that are marked PG_fscache, assuming them
+ * to be associated with the given cookie.
+ */
+void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+ struct inode *inode)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct pagevec pvec;
+ pgoff_t next;
+ int i;
+
+ _enter("%p,%p", cookie, inode);
+
+ if (!mapping || mapping->nrpages == 0) {
+ _leave(" [no pages]");
+ return;
+ }
+
+ pagevec_init(&pvec, 0);
+ next = 0;
+ while (next <= (loff_t)-1 &&
+ pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)
+ ) {
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ struct page *page = pvec.pages[i];
+ pgoff_t page_index = page->index;
+
+ ASSERTCMP(page_index, >=, next);
+ next = page_index + 1;
+
+ if (PageFsCache(page)) {
+ __fscache_wait_on_page_write(cookie, page);
+ __fscache_uncache_page(cookie, page);
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+
+ _leave("");
+}
+EXPORT_SYMBOL(__fscache_uncache_all_inode_pages);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b49b55584c84..84a47b709f51 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -500,7 +500,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
out_put_hidden_dir:
iput(sbi->hidden_dir);
out_put_root:
- iput(sbi->alloc_file);
+ iput(root);
out_put_alloc_file:
iput(sbi->alloc_file);
out_close_cat_tree:
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 3031d81f5f0f..4ac88ff79aa6 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -36,6 +36,7 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
{
DECLARE_COMPLETION_ONSTACK(wait);
struct bio *bio;
+ int ret = 0;
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_sector = sector;
@@ -54,8 +55,10 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
wait_for_completion(&wait);
if (!bio_flagged(bio, BIO_UPTODATE))
- return -EIO;
- return 0;
+ ret = -EIO;
+
+ bio_put(bio);
+ return ret;
}
static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
diff --git a/fs/inode.c b/fs/inode.c
index 0f7e88a7803f..43566d17d1b8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash);
void end_writeback(struct inode *inode)
{
might_sleep();
+ /*
+ * We have to cycle tree_lock here because reclaim can be still in the
+ * process of removing the last page (in __delete_from_page_cache())
+ * and we must not free mapping under it.
+ */
+ spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages);
+ spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3db5ba4568fc..b3cc8586984e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -974,7 +974,7 @@ out_no_inode:
out_no_read:
printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
__func__, s->s_id, iso_blknum, block);
- goto out_freesbi;
+ goto out_freebh;
out_bad_zone_size:
printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
sbi->s_log_zone_size);
@@ -989,6 +989,7 @@ out_unknown_format:
out_freebh:
brelse(bh);
+ brelse(pri_bh);
out_freesbi:
kfree(opt.iocharset);
kfree(sbi);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6a79fd0a1a32..2c62c5aae82f 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+ /*
+ * Get our reference so that bh cannot be freed before
+ * we unlock it
+ */
+ get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
} else {
@@ -223,8 +227,8 @@ restart:
spin_lock(&journal->j_list_lock);
goto restart;
}
+ get_bh(bh);
if (buffer_locked(bh)) {
- atomic_inc(&bh->b_count);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
@@ -243,7 +247,6 @@ restart:
*/
released = __jbd2_journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
__brelse(bh);
}
@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
int ret = 0;
if (buffer_locked(bh)) {
- atomic_inc(&bh->b_count);
+ get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
+ get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
__brelse(bh);
} else {
/*
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/*
* journal_clean_one_cp_list
*
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and
+ * release them.
*
* Called with the journal locked.
* Called with j_list_lock held.
@@ -663,8 +667,8 @@ out:
* checkpoint lists.
*
* The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
*
- * This function is called with the journal locked.
* This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
}
journal = transaction->t_journal;
+ JBUFFER_TRACE(jh, "removing from transaction");
__buffer_unlink(jh);
jh->b_cp_transaction = NULL;
+ jbd2_journal_put_journal_head(jh);
if (transaction->t_checkpoint_list != NULL ||
transaction->t_checkpoint_io_list != NULL)
goto out;
- JBUFFER_TRACE(jh, "transaction has no more buffers");
/*
* There is one special case to worry about: if we have just pulled the
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
* The locking here around t_state is a bit sleazy.
* See the comment at the end of jbd2_journal_commit_transaction().
*/
- if (transaction->t_state != T_FINISHED) {
- JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+ if (transaction->t_state != T_FINISHED)
goto out;
- }
/* OK, that was the last buffer for the transaction: we can now
safely remove this transaction from the log */
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
wake_up(&journal->j_wait_logspace);
ret = 1;
out:
- JBUFFER_TRACE(jh, "exit");
return ret;
}
@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ /* Get reference for checkpointing transaction */
+ jbd2_journal_grab_journal_head(jh2bh(jh));
jh->b_cp_transaction = transaction;
if (!transaction->t_checkpoint_list) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7f21cf3aaf92..eef6979821a4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -848,10 +848,16 @@ restart_loop:
while (commit_transaction->t_forget) {
transaction_t *cp_transaction;
struct buffer_head *bh;
+ int try_to_free = 0;
jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh);
+ /*
+ * Get a reference so that bh cannot be freed before we are
+ * done with it.
+ */
+ get_bh(bh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
@@ -914,28 +920,27 @@ restart_loop:
__jbd2_journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh);
- JBUFFER_TRACE(jh, "refile for checkpoint writeback");
- __jbd2_journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
} else {
J_ASSERT_BH(bh, !buffer_dirty(bh));
- /* The buffer on BJ_Forget list and not jbddirty means
+ /*
+ * The buffer on BJ_Forget list and not jbddirty means
* it has been freed by this transaction and hence it
* could not have been reallocated until this
* transaction has committed. *BUT* it could be
* reallocated once we have written all the data to
* disk and before we process the buffer on BJ_Forget
- * list. */
- JBUFFER_TRACE(jh, "refile or unfile freed buffer");
- __jbd2_journal_refile_buffer(jh);
- if (!jh->b_transaction) {
- jbd_unlock_bh_state(bh);
- /* needs a brelse */
- jbd2_journal_remove_journal_head(bh);
- release_buffer_page(bh);
- } else
- jbd_unlock_bh_state(bh);
+ * list.
+ */
+ if (!jh->b_next_transaction)
+ try_to_free = 1;
}
+ JBUFFER_TRACE(jh, "refile or unfile buffer");
+ __jbd2_journal_refile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ if (try_to_free)
+ release_buffer_page(bh); /* Drops bh reference */
+ else
+ __brelse(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9a7826990304..0dfa5b598e68 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh)
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count.
*
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the
@@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh)
* (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = jbd2_journal_add_journal_head(bh);
* ...
+ * (Get another reference for transaction)
+ * jbd2_journal_grab_journal_head(bh);
* jh->b_transaction = xxx;
+ * (Put original reference)
* jbd2_journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
*/
/*
* Give a buffer_head a journal_head.
*
- * Doesn't need the journal lock.
* May sleep.
*/
struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
@@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
- get_bh(bh);
- if (jh->b_jcount == 0) {
- if (jh->b_transaction == NULL &&
- jh->b_next_transaction == NULL &&
- jh->b_cp_transaction == NULL) {
- J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
- J_ASSERT_BH(bh, buffer_jbd(bh));
- J_ASSERT_BH(bh, jh2bh(jh) == bh);
- BUFFER_TRACE(bh, "remove journal_head");
- if (jh->b_frozen_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_frozen_data\n",
- __func__);
- jbd2_free(jh->b_frozen_data, bh->b_size);
- }
- if (jh->b_committed_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_committed_data\n",
- __func__);
- jbd2_free(jh->b_committed_data, bh->b_size);
- }
- bh->b_private = NULL;
- jh->b_bh = NULL; /* debug, really */
- clear_buffer_jbd(bh);
- __brelse(bh);
- journal_free_journal_head(jh);
- } else {
- BUFFER_TRACE(bh, "journal_head was locked");
- }
+ J_ASSERT_JH(jh, jh->b_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+ J_ASSERT_BH(bh, buffer_jbd(bh));
+ J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ BUFFER_TRACE(bh, "remove journal_head");
+ if (jh->b_frozen_data) {
+ printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+ jbd2_free(jh->b_frozen_data, bh->b_size);
}
+ if (jh->b_committed_data) {
+ printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+ jbd2_free(jh->b_committed_data, bh->b_size);
+ }
+ bh->b_private = NULL;
+ jh->b_bh = NULL; /* debug, really */
+ clear_buffer_jbd(bh);
+ journal_free_journal_head(jh);
}
/*
- * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head. If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
- * time. Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void jbd2_journal_remove_journal_head(struct buffer_head *bh)
-{
- jbd_lock_bh_journal_head(bh);
- __journal_remove_journal_head(bh);
- jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head. If it fell to zero then try to
+ * Drop a reference on the passed journal_head. If it fell to zero then
* release the journal_head from the buffer_head.
*/
void jbd2_journal_put_journal_head(struct journal_head *jh)
@@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount;
- if (!jh->b_jcount && !jh->b_transaction) {
+ if (!jh->b_jcount) {
__journal_remove_journal_head(bh);
+ jbd_unlock_bh_journal_head(bh);
__brelse(bh);
- }
- jbd_unlock_bh_journal_head(bh);
+ } else
+ jbd_unlock_bh_journal_head(bh);
}
/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3eec82d32fd4..2d7109414cdd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
/*
* jbd2_get_transaction: obtain a new transaction_t object.
@@ -764,7 +765,6 @@ repeat:
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
- jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -814,7 +814,6 @@ out:
* int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
* @handle: transaction to add buffer modifications to
* @bh: bh to be used for metadata writes
- * @credits: variable that will receive credits for the buffer
*
* Returns an error code or 0 on success.
*
@@ -896,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
* committed and so it's safe to clear the dirty bit.
*/
clear_buffer_dirty(jh2bh(jh));
- jh->b_transaction = transaction;
-
/* first access by this transaction */
jh->b_modified = 0;
@@ -932,7 +929,6 @@ out:
* non-rewindable consequences
* @handle: transaction
* @bh: buffer to undo
- * @credits: store the number of taken credits here (if not NULL)
*
* Sometimes there is a need to distinguish between metadata which has
* been committed to disk and that which has not. The ext3fs code uses
@@ -1232,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__jbd2_journal_unfile_buffer(jh);
- jbd2_journal_remove_journal_head(bh);
- __brelse(bh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -1556,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
mark_buffer_dirty(bh); /* Expose it to the VM */
}
-void __jbd2_journal_unfile_buffer(struct journal_head *jh)
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
+ jbd2_journal_put_journal_head(jh);
}
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
{
- jbd_lock_bh_state(jh2bh(jh));
+ struct buffer_head *bh = jh2bh(jh);
+
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
+ jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
__jbd2_journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(jh2bh(jh));
+ jbd_unlock_bh_state(bh);
+ __brelse(bh);
}
/*
@@ -1595,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
__jbd2_journal_remove_checkpoint(jh);
- jbd2_journal_remove_journal_head(bh);
- __brelse(bh);
}
}
spin_unlock(&journal->j_list_lock);
@@ -1659,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
/*
* We take our own ref against the journal_head here to avoid
* having to add tons of locking around each instance of
- * jbd2_journal_remove_journal_head() and
* jbd2_journal_put_journal_head().
*/
jh = jbd2_journal_grab_journal_head(bh);
@@ -1697,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
int may_free = 1;
struct buffer_head *bh = jh2bh(jh);
- __jbd2_journal_unfile_buffer(jh);
-
if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction");
+ __jbd2_journal_temp_unlink_buffer(jh);
/*
* We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in
@@ -1711,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
may_free = 0;
} else {
JBUFFER_TRACE(jh, "on running transaction");
- jbd2_journal_remove_journal_head(bh);
- __brelse(bh);
+ __jbd2_journal_unfile_buffer(jh);
}
return may_free;
}
@@ -1990,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
if (jh->b_transaction)
__jbd2_journal_temp_unlink_buffer(jh);
+ else
+ jbd2_journal_grab_journal_head(bh);
jh->b_transaction = transaction;
switch (jlist) {
@@ -2041,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
* already started to be used by a subsequent transaction, refile the
* buffer on that transaction's metadata list.
*
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
*/
void __jbd2_journal_refile_buffer(struct journal_head *jh)
{
@@ -2067,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
+ /*
+ * We set b_transaction here because b_next_transaction will inherit
+ * our jh reference and thus __jbd2_journal_file_buffer() must not
+ * take a new one.
+ */
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
if (buffer_freed(bh))
@@ -2083,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
}
/*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
- *
- * __jbd2_journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists. In that case it is up
- * to the caller to remove the journal_head if necessary. For the
- * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * __jbd2_journal_refile_buffer() with necessary locking added. We take our
+ * bh reference so that we can safely unlock bh.
+ *
+ * The jh and bh may be freed by this call.
*/
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
-
__jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
-
spin_unlock(&journal->j_list_lock);
__brelse(bh);
}
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index c5ce6c1d1ff4..2f3f531f3606 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -66,9 +66,9 @@ static int jfs_open(struct inode *inode, struct file *file)
struct jfs_inode_info *ji = JFS_IP(inode);
spin_lock_irq(&ji->ag_lock);
if (ji->active_ag == -1) {
- ji->active_ag = ji->agno;
- atomic_inc(
- &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
+ struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb);
+ ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb);
+ atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]);
}
spin_unlock_irq(&ji->ag_lock);
}
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index ed53a4740168..b78b2f978f04 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -397,7 +397,7 @@ int diRead(struct inode *ip)
release_metapage(mp);
/* set the ag for the inode */
- JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
+ JFS_IP(ip)->agstart = agstart;
JFS_IP(ip)->active_ag = -1;
return (rc);
@@ -901,7 +901,7 @@ int diFree(struct inode *ip)
/* get the allocation group for this ino.
*/
- agno = JFS_IP(ip)->agno;
+ agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
/* Lock the AG specific inode map information
*/
@@ -1315,12 +1315,11 @@ int diFree(struct inode *ip)
static inline void
diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
{
- struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
ip->i_ino = (iagno << L2INOSPERIAG) + ino;
jfs_ip->ixpxd = iagp->inoext[extno];
- jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
+ jfs_ip->agstart = le64_to_cpu(iagp->agstart);
jfs_ip->active_ag = -1;
}
@@ -1379,7 +1378,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
*/
/* get the ag number of this iag */
- agno = JFS_IP(pip)->agno;
+ agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
/*
@@ -2921,10 +2920,9 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
continue;
}
- /* agstart that computes to the same ag is treated as same; */
agstart = le64_to_cpu(iagp->agstart);
- /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */
n = agstart >> mp->db_agl2size;
+ iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
/* compute backed inodes */
numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 1439f119ec83..584a4a1a6e81 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -50,8 +50,9 @@ struct jfs_inode_info {
short btindex; /* btpage entry index*/
struct inode *ipimap; /* inode map */
unsigned long cflag; /* commit flags */
+ u64 agstart; /* agstart of the containing IAG */
u16 bxflag; /* xflag of pseudo buffer? */
- unchar agno; /* ag number */
+ unchar pad;
signed char active_ag; /* ag currently allocating from */
lid_t blid; /* lid of pseudo buffer? */
lid_t atlhead; /* anonymous tlock list head */
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 8ea5efb5a34e..8d0c1c7c0820 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -80,7 +80,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
int log_formatted = 0;
struct inode *iplist[1];
struct jfs_superblock *j_sb, *j_sb2;
- uint old_agsize;
+ s64 old_agsize;
int agsizechanged = 0;
struct buffer_head *bh, *bh2;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index adb45ec9038c..e374050a911c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -708,7 +708,13 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
if (task->tk_status < 0) {
dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
- goto retry_rebind;
+ switch (task->tk_status) {
+ case -EACCES:
+ case -EIO:
+ goto die;
+ default:
+ goto retry_rebind;
+ }
}
if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
rpc_delay(task, NLMCLNT_GRACE_WAIT);
diff --git a/fs/locks.c b/fs/locks.c
index 0a4f50dfadfb..b286539d547a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -160,10 +160,28 @@ EXPORT_SYMBOL_GPL(unlock_flocks);
static struct kmem_cache *filelock_cache __read_mostly;
+static void locks_init_lock_always(struct file_lock *fl)
+{
+ fl->fl_next = NULL;
+ fl->fl_fasync = NULL;
+ fl->fl_owner = NULL;
+ fl->fl_pid = 0;
+ fl->fl_nspid = NULL;
+ fl->fl_file = NULL;
+ fl->fl_flags = 0;
+ fl->fl_type = 0;
+ fl->fl_start = fl->fl_end = 0;
+}
+
/* Allocate an empty lock structure. */
struct file_lock *locks_alloc_lock(void)
{
- return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
+ struct file_lock *fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL);
+
+ if (fl)
+ locks_init_lock_always(fl);
+
+ return fl;
}
EXPORT_SYMBOL_GPL(locks_alloc_lock);
@@ -200,17 +218,9 @@ void locks_init_lock(struct file_lock *fl)
INIT_LIST_HEAD(&fl->fl_link);
INIT_LIST_HEAD(&fl->fl_block);
init_waitqueue_head(&fl->fl_wait);
- fl->fl_next = NULL;
- fl->fl_fasync = NULL;
- fl->fl_owner = NULL;
- fl->fl_pid = 0;
- fl->fl_nspid = NULL;
- fl->fl_file = NULL;
- fl->fl_flags = 0;
- fl->fl_type = 0;
- fl->fl_start = fl->fl_end = 0;
fl->fl_ops = NULL;
fl->fl_lmops = NULL;
+ locks_init_lock_always(fl);
}
EXPORT_SYMBOL(locks_init_lock);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 9ed89d1663f8..1afae26cf236 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,13 +555,6 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
return __logfs_create(dir, dentry, inode, target, destlen);
}
-static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
-{
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
- return generic_permission(inode, mask, flags, NULL);
-}
-
static int logfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
@@ -820,7 +813,6 @@ const struct inode_operations logfs_dir_iops = {
.mknod = logfs_mknod,
.rename = logfs_rename,
.rmdir = logfs_rmdir,
- .permission = logfs_permission,
.symlink = logfs_symlink,
.unlink = logfs_unlink,
};
diff --git a/fs/namei.c b/fs/namei.c
index 9802345df5e7..0223c41fb114 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -238,7 +238,8 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
/*
* Read/write DACs are always overridable.
- * Executable DACs are overridable if at least one exec bit is set.
+ * Executable DACs are overridable for all directories and
+ * for non-directories that have least one exec bit set.
*/
if (!(mask & MAY_EXEC) || execute_ok(inode))
if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
@@ -812,6 +813,11 @@ static int follow_automount(struct path *path, unsigned flags,
if (!mnt) /* mount collision */
return 0;
+ if (!*need_mntput) {
+ /* lock_mount() may release path->mnt on error */
+ mntget(path->mnt);
+ *need_mntput = true;
+ }
err = finish_automount(mnt, path);
switch (err) {
@@ -819,12 +825,9 @@ static int follow_automount(struct path *path, unsigned flags,
/* Someone else made a mount here whilst we were busy */
return 0;
case 0:
- dput(path->dentry);
- if (*need_mntput)
- mntput(path->mnt);
+ path_put(path);
path->mnt = mnt;
path->dentry = dget(mnt->mnt_root);
- *need_mntput = true;
return 0;
default:
return err;
@@ -844,9 +847,10 @@ static int follow_automount(struct path *path, unsigned flags,
*/
static int follow_managed(struct path *path, unsigned flags)
{
+ struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
unsigned managed;
bool need_mntput = false;
- int ret;
+ int ret = 0;
/* Given that we're not holding a lock here, we retain the value in a
* local variable for each dentry as we look at it so that we don't see
@@ -861,7 +865,7 @@ static int follow_managed(struct path *path, unsigned flags)
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path->dentry, false);
if (ret < 0)
- return ret == -EISDIR ? 0 : ret;
+ break;
}
/* Transit to a mounted filesystem. */
@@ -887,14 +891,19 @@ static int follow_managed(struct path *path, unsigned flags)
if (managed & DCACHE_NEED_AUTOMOUNT) {
ret = follow_automount(path, flags, &need_mntput);
if (ret < 0)
- return ret == -EISDIR ? 0 : ret;
+ break;
continue;
}
/* We didn't change the current path point */
break;
}
- return 0;
+
+ if (need_mntput && path->mnt == mnt)
+ mntput(path->mnt);
+ if (ret == -EISDIR)
+ ret = 0;
+ return ret;
}
int follow_down_one(struct path *path)
@@ -1003,9 +1012,6 @@ failed:
* Follow down to the covering mount currently visible to userspace. At each
* point, the filesystem owning that dentry may be queried as to whether the
* caller is permitted to proceed or not.
- *
- * Care must be taken as namespace_sem may be held (indicated by mounting_here
- * being true).
*/
int follow_down(struct path *path)
{
@@ -2713,8 +2719,10 @@ static long do_unlinkat(int dfd, const char __user *pathname)
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
+ if (nd.last.name[nd.last.len])
+ goto slashes;
inode = dentry->d_inode;
- if (nd.last.name[nd.last.len] || !inode)
+ if (!inode)
goto slashes;
ihold(inode);
error = mnt_want_write(nd.path.mnt);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ce153a6b3aec..419119c371bf 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -259,12 +259,10 @@ static void nfs_fscache_disable_inode_cookie(struct inode *inode)
dfprintk(FSCACHE,
"NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
- /* Need to invalidate any mapped pages that were read in before
- * turning off the cache.
+ /* Need to uncache any pages attached to this inode that
+ * fscache knows about before turning off the cache.
*/
- if (inode->i_mapping && inode->i_mapping->nrpages)
- invalidate_inode_pages2(inode->i_mapping);
-
+ fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode);
nfs_fscache_zap_inode_cookie(inode);
}
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 144f2a3c7185..6f4850deb272 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -256,7 +256,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
nfs_attr_check_mountpoint(sb, fattr);
- if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
+ if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) &&
+ !nfs_attr_use_mounted_on_fileid(fattr))
goto out_no_inode;
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
goto out_no_inode;
@@ -1294,7 +1295,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
- if (nfsi->npages == 0 || new_isize > cur_isize) {
+ if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) ||
+ new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b9056cbe68d6..2a55347a2daa 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -45,6 +45,17 @@ static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct
fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
}
+static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr)
+{
+ if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) ||
+ (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) &&
+ ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0)))
+ return 0;
+
+ fattr->fileid = fattr->mounted_on_fileid;
+ return 1;
+}
+
struct nfs_clone_mount {
const struct super_block *sb;
const struct dentry *dentry;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 426908809c97..0bafcc91c27f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -30,6 +30,7 @@
*/
#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
#include "internal.h"
#include "nfs4filelayout.h"
@@ -552,13 +553,18 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
fl->pattern_offset);
- if (!fl->num_fh)
+ /* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
+ * Futher checking is done in filelayout_check_layout */
+ if (fl->num_fh < 0 || fl->num_fh >
+ max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
goto out_err;
- fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
- gfp_flags);
- if (!fl->fh_array)
- goto out_err;
+ if (fl->num_fh > 0) {
+ fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
+ gfp_flags);
+ if (!fl->fh_array)
+ goto out_err;
+ }
for (i = 0; i < fl->num_fh; i++) {
/* Do we want to use a mempool here? */
@@ -661,8 +667,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
u64 p_stripe, r_stripe;
u32 stripe_unit;
- if (!pnfs_generic_pg_test(pgio, prev, req))
- return 0;
+ if (!pnfs_generic_pg_test(pgio, prev, req) ||
+ !nfs_generic_pg_test(pgio, prev, req))
+ return false;
if (!pgio->pg_lseg)
return 1;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d2c4b59c896d..5879b23e0c99 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2265,12 +2265,14 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
return nfs4_map_errors(status);
}
+static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
/*
* Get locations and (maybe) other attributes of a referral.
* Note that we'll actually follow the referral later when
* we detect fsid mismatch in inode revalidation
*/
-static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
+ struct nfs_fattr *fattr, struct nfs_fh *fhandle)
{
int status = -ENOMEM;
struct page *page = NULL;
@@ -2288,15 +2290,16 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct
goto out;
/* Make sure server returned a different fsid for the referral */
if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
- dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name);
+ dprintk("%s: server did not return a different fsid for"
+ " a referral at %s\n", __func__, name->name);
status = -EIO;
goto out;
}
+ /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
+ nfs_fixup_referral_attributes(&locations->fattr);
+ /* replace the lookup nfs_fattr with the locations nfs_fattr */
memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
- fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL;
- if (!fattr->mode)
- fattr->mode = S_IFDIR;
memset(fhandle, 0, sizeof(struct nfs_fh));
out:
if (page)
@@ -4667,11 +4670,15 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
return len;
}
+/*
+ * nfs_fhget will use either the mounted_on_fileid or the fileid
+ */
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
{
- if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) &&
- (fattr->valid & NFS_ATTR_FATTR_FSID) &&
- (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
+ if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
+ (fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
+ (fattr->valid & NFS_ATTR_FATTR_FSID) &&
+ (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
return;
fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
@@ -4686,7 +4693,6 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
struct nfs_server *server = NFS_SERVER(dir);
u32 bitmask[2] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
- [1] = FATTR4_WORD1_MOUNTED_ON_FILEID,
};
struct nfs4_fs_locations_arg args = {
.dir_fh = NFS_FH(dir),
@@ -4705,11 +4711,18 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
int status;
dprintk("%s: start\n", __func__);
+
+ /* Ask for the fileid of the absent filesystem if mounted_on_fileid
+ * is not supported */
+ if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+ bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+ else
+ bitmask[0] |= FATTR4_WORD0_FILEID;
+
nfs_fattr_init(&fs_locations->fattr);
fs_locations->server = server;
fs_locations->nlocations = 0;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
- nfs_fixup_referral_attributes(&fs_locations->fattr);
dprintk("%s: returned status = %d\n", __func__, status);
return status;
}
@@ -5098,7 +5111,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
if (mxresp_sz == 0)
mxresp_sz = NFS_MAX_FILE_IO_SIZE;
/* Fore channel attributes */
- args->fc_attrs.headerpadsz = 0;
args->fc_attrs.max_rqst_sz = mxrqst_sz;
args->fc_attrs.max_resp_sz = mxresp_sz;
args->fc_attrs.max_ops = NFS4_MAX_OPS;
@@ -5111,7 +5123,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
/* Back channel attributes */
- args->bc_attrs.headerpadsz = 0;
args->bc_attrs.max_rqst_sz = PAGE_SIZE;
args->bc_attrs.max_resp_sz = PAGE_SIZE;
args->bc_attrs.max_resp_sz_cached = 0;
@@ -5131,8 +5142,6 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
struct nfs4_channel_attrs *sent = &args->fc_attrs;
struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
- if (rcvd->headerpadsz > sent->headerpadsz)
- return -EINVAL;
if (rcvd->max_resp_sz > sent->max_resp_sz)
return -EINVAL;
/*
@@ -5697,6 +5706,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
struct nfs_server *server;
+ struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
dprintk("--> %s\n", __func__);
@@ -5708,16 +5718,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
nfs_restart_rpc(task, lrp->clp);
return;
}
+ spin_lock(&lo->plh_inode->i_lock);
if (task->tk_status == 0) {
- struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
-
if (lrp->res.lrs_present) {
- spin_lock(&lo->plh_inode->i_lock);
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- spin_unlock(&lo->plh_inode->i_lock);
} else
BUG_ON(!list_empty(&lo->plh_segs));
}
+ lo->plh_block_lgets--;
+ spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d869a5e5464b..6870bc61ceec 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -255,7 +255,7 @@ static int nfs4_stat_to_errno(int);
#define decode_fs_locations_maxsz \
(0)
#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
-#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
+#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
#if defined(CONFIG_NFS_V4_1)
#define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -1725,7 +1725,7 @@ static void encode_create_session(struct xdr_stream *xdr,
*p++ = cpu_to_be32(args->flags); /*flags */
/* Fore Channel */
- *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
+ *p++ = cpu_to_be32(0); /* header padding size */
*p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
*p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
*p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */
@@ -1734,7 +1734,7 @@ static void encode_create_session(struct xdr_stream *xdr,
*p++ = cpu_to_be32(0); /* rdmachannel_attrs */
/* Back Channel */
- *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
+ *p++ = cpu_to_be32(0); /* header padding size */
*p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
@@ -3098,7 +3098,7 @@ out_overflow:
return -EIO;
}
-static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
+static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res)
{
__be32 *p;
@@ -3109,7 +3109,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
if (unlikely(!p))
goto out_overflow;
bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
- return -be32_to_cpup(p);
+ *res = -be32_to_cpup(p);
}
return 0;
out_overflow:
@@ -4070,6 +4070,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
int status;
umode_t fmode = 0;
uint32_t type;
+ int32_t err;
status = decode_attr_type(xdr, bitmap, &type);
if (status < 0)
@@ -4095,13 +4096,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_error(xdr, bitmap);
- if (status == -NFS4ERR_WRONGSEC) {
- nfs_fixup_secinfo_attributes(fattr, fh);
- status = 0;
- }
+ err = 0;
+ status = decode_attr_error(xdr, bitmap, &err);
if (status < 0)
goto xdr_error;
+ if (err == -NFS4ERR_WRONGSEC)
+ nfs_fixup_secinfo_attributes(fattr, fh);
status = decode_attr_filehandle(xdr, bitmap, fh);
if (status < 0)
@@ -4997,12 +4997,14 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
struct nfs4_channel_attrs *attrs)
{
__be32 *p;
- u32 nr_attrs;
+ u32 nr_attrs, val;
p = xdr_inline_decode(xdr, 28);
if (unlikely(!p))
goto out_overflow;
- attrs->headerpadsz = be32_to_cpup(p++);
+ val = be32_to_cpup(p++); /* headerpadsz */
+ if (val)
+ return -EINVAL; /* no support for header padding yet */
attrs->max_rqst_sz = be32_to_cpup(p++);
attrs->max_resp_sz = be32_to_cpup(p++);
attrs->max_resp_sz_cached = be32_to_cpup(p++);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9cf208df1f25..8ff2ea3f10ef 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -108,7 +108,6 @@ _dev_list_add(const struct nfs_server *nfss,
de = n;
}
- atomic_inc(&de->id_node.ref);
return de;
}
@@ -1001,6 +1000,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
if (!pnfs_generic_pg_test(pgio, prev, req))
return false;
+ if (pgio->pg_lseg == NULL)
+ return true;
+
return pgio->pg_count + req->wb_bytes <=
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index dc3956c0de80..1d06f8e2adea 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -291,7 +291,7 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
struct nfs_read_data *rdata;
state->status = status;
- dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
+ dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
rdata = state->rpcdata;
rdata->task.tk_status = status;
if (status >= 0) {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961aff22..009855716286 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
TASK_UNINTERRUPTIBLE);
}
-static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
{
/*
* FIXME: ideally we should be able to coalesce all requests
@@ -218,6 +218,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
}
+EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
/**
* nfs_pageio_init - initialise a page io descriptor
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8c1309d852a6..29c0ca7fc347 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -634,14 +634,16 @@ _pnfs_return_layout(struct inode *ino)
spin_lock(&ino->i_lock);
lo = nfsi->layout;
- if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) {
+ if (!lo) {
spin_unlock(&ino->i_lock);
- dprintk("%s: no layout segments to return\n", __func__);
- goto out;
+ dprintk("%s: no layout to return\n", __func__);
+ return status;
}
stateid = nfsi->layout->plh_stateid;
/* Reference matched in nfs4_layoutreturn_release */
get_layout_hdr(lo);
+ mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
+ lo->plh_block_lgets++;
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
@@ -650,6 +652,9 @@ _pnfs_return_layout(struct inode *ino)
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
+ set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
+ set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
+ put_layout_hdr(lo);
goto out;
}
@@ -887,7 +892,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
ret = get_lseg(lseg);
break;
}
- if (cmp_layout(range, &lseg->pls_range) > 0)
+ if (lseg->pls_range.offset > range->offset)
break;
}
@@ -1059,23 +1064,36 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
gfp_flags = GFP_NOFS;
}
- if (pgio->pg_count == prev->wb_bytes) {
+ if (pgio->pg_lseg == NULL) {
+ if (pgio->pg_count != prev->wb_bytes)
+ return true;
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
- req_offset(req),
+ req_offset(prev),
pgio->pg_count,
access_type,
gfp_flags);
- return true;
+ if (pgio->pg_lseg == NULL)
+ return true;
}
- if (pgio->pg_lseg &&
- req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
- pgio->pg_lseg->pls_range.length))
- return false;
-
- return true;
+ /*
+ * Test if a nfs_page is fully contained in the pnfs_layout_range.
+ * Note that this test makes several assumptions:
+ * - that the previous nfs_page in the struct nfs_pageio_descriptor
+ * is known to lie within the range.
+ * - that the nfs_page being tested is known to be contiguous with the
+ * previous nfs_page.
+ * - Layout ranges are page aligned, so we only have to test the
+ * start offset of the request.
+ *
+ * Please also note that 'end_offset' is actually the offset of the
+ * first byte that lies outside the pnfs_layout_range. FIXME?
+ *
+ */
+ return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
+ pgio->pg_lseg->pls_range.length);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e4d062..96bf4e6f45be 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *);
/* pnfs_dev.c */
struct nfs4_deviceid_node {
struct hlist_node node;
+ struct hlist_node tmpnode;
const struct pnfs_layoutdriver_type *ld;
const struct nfs_client *nfs_client;
struct nfs4_deviceid deviceid;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index c65e133ce9c0..f0f8e1e22f6c 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
const struct nfs4_deviceid *id)
{
INIT_HLIST_NODE(&d->node);
+ INIT_HLIST_NODE(&d->tmpnode);
d->ld = ld;
d->nfs_client = nfs_client;
d->deviceid = *id;
@@ -208,6 +209,7 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new)
hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
spin_unlock(&nfs4_deviceid_lock);
+ atomic_inc(&new->ref);
return new;
}
@@ -238,24 +240,29 @@ static void
_deviceid_purge_client(const struct nfs_client *clp, long hash)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n, *next;
+ struct hlist_node *n;
HLIST_HEAD(tmp);
+ spin_lock(&nfs4_deviceid_lock);
rcu_read_lock();
hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
if (d->nfs_client == clp && atomic_read(&d->ref)) {
hlist_del_init_rcu(&d->node);
- hlist_add_head(&d->node, &tmp);
+ hlist_add_head(&d->tmpnode, &tmp);
}
rcu_read_unlock();
+ spin_unlock(&nfs4_deviceid_lock);
if (hlist_empty(&tmp))
return;
synchronize_rcu();
- hlist_for_each_entry_safe(d, n, next, &tmp, node)
+ while (!hlist_empty(&tmp)) {
+ d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode);
+ hlist_del(&d->tmpnode);
if (atomic_dec_and_test(&d->ref))
d->ld->free_deviceid_node(d);
+ }
}
void
@@ -263,8 +270,8 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
{
long h;
- spin_lock(&nfs4_deviceid_lock);
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+ return;
for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
_deviceid_purge_client(clp, h);
- spin_unlock(&nfs4_deviceid_lock);
}
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 18b3e8975fe0..fbb2a5ef5817 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,7 @@ config NFSD_V4
select NFSD_V3
select FS_POSIX_ACL
select SUNRPC_GSS
+ select CRYPTO
help
This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1f5eae40f34e..2b1449dd2f49 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -13,6 +13,7 @@
#include <linux/lockd/lockd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/gss_krb5_enctypes.h>
#include "idmap.h"
#include "nfsd.h"
@@ -189,18 +190,10 @@ static struct file_operations export_features_operations = {
.release = single_release,
};
-#ifdef CONFIG_SUNRPC_GSS
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
static int supported_enctypes_show(struct seq_file *m, void *v)
{
- struct gss_api_mech *k5mech;
-
- k5mech = gss_mech_get_by_name("krb5");
- if (k5mech == NULL)
- goto out;
- if (k5mech->gm_upcall_enctypes != NULL)
- seq_printf(m, k5mech->gm_upcall_enctypes);
- gss_mech_put(k5mech);
-out:
+ seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
return 0;
}
@@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = {
.llseek = seq_lseek,
.release = single_release,
};
-#endif /* CONFIG_SUNRPC_GSS */
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
@@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
-#ifdef CONFIG_SUNRPC_GSS
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
-#endif /* CONFIG_SUNRPC_GSS */
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d5718273bb32..fd0acca5370a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -696,7 +696,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
}
#endif /* CONFIG_NFSD_V3 */
+static int nfsd_open_break_lease(struct inode *inode, int access)
+{
+ unsigned int mode;
+ if (access & NFSD_MAY_NOT_BREAK_LEASE)
+ return 0;
+ mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
+ return break_lease(inode, mode | O_NONBLOCK);
+}
/*
* Open an existing file or directory.
@@ -744,12 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (!inode->i_fop)
goto out;
- /*
- * Check to see if there are any leases on this file.
- * This may block while leases are broken.
- */
- if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
- host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
+ host_err = nfsd_open_break_lease(inode, access);
if (host_err) /* NOMEM or WOULDBLOCK */
goto out_nfserr;
@@ -1660,8 +1663,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (!dold->d_inode)
goto out_drop_write;
host_err = nfsd_break_lease(dold->d_inode);
- if (host_err)
+ if (host_err) {
+ err = nfserrno(host_err);
goto out_drop_write;
+ }
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
err = nfserrno(commit_metadata(ffhp));
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 7eafe468a29c..b2e3ff347620 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1346,6 +1346,11 @@ static void nilfs_btree_shrink(struct nilfs_bmap *btree,
path[level].bp_bh = NULL;
}
+static void nilfs_btree_nop(struct nilfs_bmap *btree,
+ struct nilfs_btree_path *path,
+ int level, __u64 *keyp, __u64 *ptrp)
+{
+}
static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
struct nilfs_btree_path *path,
@@ -1356,20 +1361,19 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
struct buffer_head *bh;
struct nilfs_btree_node *node, *parent, *sib;
__u64 sibptr;
- int pindex, level, ncmin, ncmax, ncblk, ret;
+ int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
ret = 0;
stats->bs_nblocks = 0;
ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
ncblk = nilfs_btree_nchildren_per_block(btree);
- for (level = NILFS_BTREE_LEVEL_NODE_MIN;
+ for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
level < nilfs_btree_height(btree) - 1;
level++) {
node = nilfs_btree_get_nonroot_node(path, level);
path[level].bp_oldreq.bpr_ptr =
- nilfs_btree_node_get_ptr(node, path[level].bp_index,
- ncblk);
+ nilfs_btree_node_get_ptr(node, dindex, ncblk);
ret = nilfs_bmap_prepare_end_ptr(btree,
&path[level].bp_oldreq, dat);
if (ret < 0)
@@ -1383,6 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
pindex = path[level + 1].bp_index;
+ dindex = pindex;
if (pindex > 0) {
/* left sibling */
@@ -1421,6 +1426,14 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
path[level].bp_sib_bh = bh;
path[level].bp_op = nilfs_btree_concat_right;
stats->bs_nblocks++;
+ /*
+ * When merging right sibling node
+ * into the current node, pointer to
+ * the right sibling node must be
+ * terminated instead. The adjustment
+ * below is required for that.
+ */
+ dindex = pindex + 1;
/* continue; */
}
} else {
@@ -1431,29 +1444,31 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
NILFS_BTREE_ROOT_NCHILDREN_MAX) {
path[level].bp_op = nilfs_btree_shrink;
stats->bs_nblocks += 2;
+ level++;
+ path[level].bp_op = nilfs_btree_nop;
+ goto shrink_root_child;
} else {
path[level].bp_op = nilfs_btree_do_delete;
stats->bs_nblocks++;
+ goto out;
}
-
- goto out;
-
}
}
+ /* child of the root node is deleted */
+ path[level].bp_op = nilfs_btree_do_delete;
+ stats->bs_nblocks++;
+
+shrink_root_child:
node = nilfs_btree_get_root(btree);
path[level].bp_oldreq.bpr_ptr =
- nilfs_btree_node_get_ptr(node, path[level].bp_index,
+ nilfs_btree_node_get_ptr(node, dindex,
NILFS_BTREE_ROOT_NCHILDREN_MAX);
ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
if (ret < 0)
goto err_out_child_node;
- /* child of the root node is deleted */
- path[level].bp_op = nilfs_btree_do_delete;
- stats->bs_nblocks++;
-
/* success */
out:
*levelp = level;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b954878ad6ce..b9b45fc2903e 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -801,12 +801,7 @@ out_err:
int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
{
- struct nilfs_root *root;
-
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
-
- root = NILFS_I(inode)->i_root;
+ struct nilfs_root *root = NILFS_I(inode)->i_root;
if ((mask & MAY_WRITE) && root &&
root->cno != NILFS_CPTREE_CURRENT_CNO)
return -EROFS; /* snapshot is not writable */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 141646e88fb5..bb24ab6c282f 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2573,7 +2573,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
if (nilfs->ns_interval)
- sci->sc_interval = nilfs->ns_interval;
+ sci->sc_interval = HZ * nilfs->ns_interval;
if (nilfs->ns_watermark)
sci->sc_watermark = nilfs->ns_watermark;
return sci;
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index d738a7e493dd..2c6d95257a4d 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -4,7 +4,6 @@
* Released under GPL v2.
*/
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 14def991d9dd..fc5bc2767692 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2169,11 +2169,7 @@ static const struct file_operations proc_fd_operations = {
*/
static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
{
- int rv;
-
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
- rv = generic_permission(inode, mask, flags, NULL);
+ int rv = generic_permission(inode, mask, flags, NULL);
if (rv == 0)
return 0;
if (task_pid(current) == proc_pid(inode))
@@ -2712,6 +2708,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
struct task_io_accounting acct = task->ioac;
unsigned long flags;
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ return -EACCES;
+
if (whole && lock_task_sighand(task, &flags)) {
struct task_struct *t = task;
@@ -2843,7 +2842,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUGO, proc_tgid_io_accounting),
+ INF("io", S_IRUSR, proc_tgid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
INF("hardwall", S_IRUGO, proc_pid_hardwall),
@@ -3185,7 +3184,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUGO, proc_tid_io_accounting),
+ INF("io", S_IRUSR, proc_tid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
INF("hardwall", S_IRUGO, proc_pid_hardwall),
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 781dec5bd682..be177f702acb 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -38,18 +38,21 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
+ void *ns;
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
goto out;
+ ns = ns_ops->get(task);
+ if (!ns)
+ goto out_iput;
+
ei = PROC_I(inode);
inode->i_mode = S_IFREG|S_IRUSR;
inode->i_fop = &ns_file_operations;
ei->ns_ops = ns_ops;
- ei->ns = ns_ops->get(task);
- if (!ei->ns)
- goto out_iput;
+ ei->ns = ns;
dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f50133c11c24..d167de365a8d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -304,9 +304,6 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
struct ctl_table *table;
int error;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
-
/* Executable files are not allowed under /proc/sys/ */
if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
return -EACCES;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index a9000e9cfee5..d6c3b416529b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -28,11 +28,12 @@ static int proc_test_super(struct super_block *sb, void *data)
static int proc_set_super(struct super_block *sb, void *data)
{
- struct pid_namespace *ns;
-
- ns = (struct pid_namespace *)data;
- sb->s_fs_info = get_pid_ns(ns);
- return set_anon_super(sb, NULL);
+ int err = set_anon_super(sb, NULL);
+ if (!err) {
+ struct pid_namespace *ns = (struct pid_namespace *)data;
+ sb->s_fs_info = get_pid_ns(ns);
+ }
+ return err;
}
static struct dentry *proc_mount(struct file_system_type *fs_type,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e8a62f41b458..d78089690965 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -954,8 +954,6 @@ static int xattr_mount_check(struct super_block *s)
int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
{
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
/*
* We don't do permission checks on the internal objects.
* Permissions are determined by the "owning" object.
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index f0511e816967..eed99428f104 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -27,14 +27,18 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
{
struct inode *inode = file->f_mapping->host;
struct mtd_info *mtd = inode->i_sb->s_mtd;
- unsigned long isize, offset;
+ unsigned long isize, offset, maxpages, lpages;
if (!mtd)
goto cant_map_directly;
+ /* the mapping mustn't extend beyond the EOF */
+ lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
isize = i_size_read(inode);
offset = pgoff << PAGE_SHIFT;
- if (offset > isize || len > isize || offset > isize - len)
+
+ maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if ((pgoff >= maxpages) || (maxpages - pgoff < lpages))
return (unsigned long) -EINVAL;
/* we need to call down to the MTD layer to do the actual mapping */
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 266895783b47..e34f0d99ea4e 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data)
return error;
}
+static void free_sysfs_super_info(struct sysfs_super_info *info)
+{
+ int type;
+ for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
+ kobj_ns_drop(type, info->ns[type]);
+ kfree(info);
+}
+
static struct dentry *sysfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
return ERR_PTR(-ENOMEM);
for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
- info->ns[type] = kobj_ns_current(type);
+ info->ns[type] = kobj_ns_grab_current(type);
sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
if (IS_ERR(sb) || sb->s_fs_info != info)
- kfree(info);
+ free_sysfs_super_info(info);
if (IS_ERR(sb))
return ERR_CAST(sb);
if (!sb->s_root) {
@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
static void sysfs_kill_sb(struct super_block *sb)
{
struct sysfs_super_info *info = sysfs_info(sb);
-
/* Remove the superblock from fs_supers/s_instances
* so we can't find it, before freeing sysfs_super_info.
*/
kill_anon_super(sb);
- kfree(info);
+ free_sysfs_super_info(info);
}
static struct file_system_type sysfs_fs_type = {
@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = {
.kill_sb = sysfs_kill_sb,
};
-void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
-{
- struct super_block *sb;
-
- mutex_lock(&sysfs_mutex);
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
- struct sysfs_super_info *info = sysfs_info(sb);
- /*
- * If we see a superblock on the fs_supers/s_instances
- * list the unmount has not completed and sb->s_fs_info
- * points to a valid struct sysfs_super_info.
- */
- /* Ignore superblocks with the wrong ns */
- if (info->ns[type] != ns)
- continue;
- info->ns[type] = NULL;
- }
- spin_unlock(&sb_lock);
- mutex_unlock(&sysfs_mutex);
-}
-
int __init sysfs_init(void)
{
int err = -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3d28af31d863..2ed2404f3113 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt {
* instance).
*/
struct sysfs_super_info {
- const void *ns[KOBJ_NS_TYPES];
+ void *ns[KOBJ_NS_TYPES];
};
#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
extern struct sysfs_dirent sysfs_root;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index f67acbdda5e8..dffeb3795af1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
/*
* Called when the clock was set to cancel the timers in the cancel
- * list.
+ * list. This will wake up processes waiting on these timers. The
+ * wake-up requires ctx->ticks to be non zero, therefore we increment
+ * it before calling wake_up_locked().
*/
void timerfd_clock_was_set(void)
{
@@ -76,6 +78,7 @@ void timerfd_clock_was_set(void)
spin_lock_irqsave(&ctx->wqh.lock, flags);
if (ctx->moffs.tv64 != moffs.tv64) {
ctx->moffs.tv64 = KTIME_MAX;
+ ctx->ticks++;
wake_up_locked(&ctx->wqh);
}
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b5aeb5a8ebed..529be0582029 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1848,7 +1848,6 @@ static void ubifs_put_super(struct super_block *sb)
bdi_destroy(&c->bdi);
ubi_close_volume(c->ubi);
mutex_unlock(&c->umount_mutex);
- kfree(c);
}
static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1971,61 +1970,65 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
return ERR_PTR(-EINVAL);
}
-static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
+static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
{
- struct ubi_volume_desc *ubi = sb->s_fs_info;
struct ubifs_info *c;
- struct inode *root;
- int err;
c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
+ if (c) {
+ spin_lock_init(&c->cnt_lock);
+ spin_lock_init(&c->cs_lock);
+ spin_lock_init(&c->buds_lock);
+ spin_lock_init(&c->space_lock);
+ spin_lock_init(&c->orphan_lock);
+ init_rwsem(&c->commit_sem);
+ mutex_init(&c->lp_mutex);
+ mutex_init(&c->tnc_mutex);
+ mutex_init(&c->log_mutex);
+ mutex_init(&c->mst_mutex);
+ mutex_init(&c->umount_mutex);
+ mutex_init(&c->bu_mutex);
+ mutex_init(&c->write_reserve_mutex);
+ init_waitqueue_head(&c->cmt_wq);
+ c->buds = RB_ROOT;
+ c->old_idx = RB_ROOT;
+ c->size_tree = RB_ROOT;
+ c->orph_tree = RB_ROOT;
+ INIT_LIST_HEAD(&c->infos_list);
+ INIT_LIST_HEAD(&c->idx_gc);
+ INIT_LIST_HEAD(&c->replay_list);
+ INIT_LIST_HEAD(&c->replay_buds);
+ INIT_LIST_HEAD(&c->uncat_list);
+ INIT_LIST_HEAD(&c->empty_list);
+ INIT_LIST_HEAD(&c->freeable_list);
+ INIT_LIST_HEAD(&c->frdi_idx_list);
+ INIT_LIST_HEAD(&c->unclean_leb_list);
+ INIT_LIST_HEAD(&c->old_buds);
+ INIT_LIST_HEAD(&c->orph_list);
+ INIT_LIST_HEAD(&c->orph_new);
+ c->no_chk_data_crc = 1;
+
+ c->highest_inum = UBIFS_FIRST_INO;
+ c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
+
+ ubi_get_volume_info(ubi, &c->vi);
+ ubi_get_device_info(c->vi.ubi_num, &c->di);
+ }
+ return c;
+}
- spin_lock_init(&c->cnt_lock);
- spin_lock_init(&c->cs_lock);
- spin_lock_init(&c->buds_lock);
- spin_lock_init(&c->space_lock);
- spin_lock_init(&c->orphan_lock);
- init_rwsem(&c->commit_sem);
- mutex_init(&c->lp_mutex);
- mutex_init(&c->tnc_mutex);
- mutex_init(&c->log_mutex);
- mutex_init(&c->mst_mutex);
- mutex_init(&c->umount_mutex);
- mutex_init(&c->bu_mutex);
- mutex_init(&c->write_reserve_mutex);
- init_waitqueue_head(&c->cmt_wq);
- c->buds = RB_ROOT;
- c->old_idx = RB_ROOT;
- c->size_tree = RB_ROOT;
- c->orph_tree = RB_ROOT;
- INIT_LIST_HEAD(&c->infos_list);
- INIT_LIST_HEAD(&c->idx_gc);
- INIT_LIST_HEAD(&c->replay_list);
- INIT_LIST_HEAD(&c->replay_buds);
- INIT_LIST_HEAD(&c->uncat_list);
- INIT_LIST_HEAD(&c->empty_list);
- INIT_LIST_HEAD(&c->freeable_list);
- INIT_LIST_HEAD(&c->frdi_idx_list);
- INIT_LIST_HEAD(&c->unclean_leb_list);
- INIT_LIST_HEAD(&c->old_buds);
- INIT_LIST_HEAD(&c->orph_list);
- INIT_LIST_HEAD(&c->orph_new);
- c->no_chk_data_crc = 1;
+static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct ubifs_info *c = sb->s_fs_info;
+ struct inode *root;
+ int err;
c->vfs_sb = sb;
- c->highest_inum = UBIFS_FIRST_INO;
- c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
-
- ubi_get_volume_info(ubi, &c->vi);
- ubi_get_device_info(c->vi.ubi_num, &c->di);
-
/* Re-open the UBI device in read-write mode */
c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
if (IS_ERR(c->ubi)) {
err = PTR_ERR(c->ubi);
- goto out_free;
+ goto out;
}
/*
@@ -2091,24 +2094,29 @@ out_bdi:
bdi_destroy(&c->bdi);
out_close:
ubi_close_volume(c->ubi);
-out_free:
- kfree(c);
+out:
return err;
}
static int sb_test(struct super_block *sb, void *data)
{
- dev_t *dev = data;
+ struct ubifs_info *c1 = data;
struct ubifs_info *c = sb->s_fs_info;
- return c->vi.cdev == *dev;
+ return c->vi.cdev == c1->vi.cdev;
+}
+
+static int sb_set(struct super_block *sb, void *data)
+{
+ sb->s_fs_info = data;
+ return set_anon_super(sb, NULL);
}
static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
const char *name, void *data)
{
struct ubi_volume_desc *ubi;
- struct ubi_volume_info vi;
+ struct ubifs_info *c;
struct super_block *sb;
int err;
@@ -2125,19 +2133,25 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
name, (int)PTR_ERR(ubi));
return ERR_CAST(ubi);
}
- ubi_get_volume_info(ubi, &vi);
- dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
+ c = alloc_ubifs_info(ubi);
+ if (!c) {
+ err = -ENOMEM;
+ goto out_close;
+ }
+
+ dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
- sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev);
+ sb = sget(fs_type, sb_test, sb_set, c);
if (IS_ERR(sb)) {
err = PTR_ERR(sb);
+ kfree(c);
goto out_close;
}
if (sb->s_root) {
struct ubifs_info *c1 = sb->s_fs_info;
-
+ kfree(c);
/* A new mount point for already mounted UBIFS */
dbg_gen("this ubi volume is already mounted");
if (!!(flags & MS_RDONLY) != c1->ro_mount) {
@@ -2146,11 +2160,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
}
} else {
sb->s_flags = flags;
- /*
- * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
- * replaced by 'c'.
- */
- sb->s_fs_info = ubi;
err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
if (err)
goto out_deact;
@@ -2170,11 +2179,18 @@ out_close:
return ERR_PTR(err);
}
+static void kill_ubifs_super(struct super_block *s)
+{
+ struct ubifs_info *c = s->s_fs_info;
+ kill_anon_super(s);
+ kfree(c);
+}
+
static struct file_system_type ubifs_fs_type = {
.name = "ubifs",
.owner = THIS_MODULE,
.mount = ubifs_mount,
- .kill_sb = kill_anon_super,
+ .kill_sb = kill_ubifs_super,
};
/*
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4213ba1ff85..7f782af286bf 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -131,19 +131,34 @@ xfs_file_fsync(
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
trace_xfs_file_fsync(ip);
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
xfs_ioend_wait(ip);
+ if (mp->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If we have an RT and/or log subvolume we need to make sure
+ * to flush the write cache the device used for file data
+ * first. This is to ensure newly written file data make
+ * it to disk before logging the new inode size in case of
+ * an extending write.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+ else if (mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
+ }
+
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
* updates. The sync transaction will also force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0,
- XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
+ XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
* force the log.
*/
if (xfs_ipincount(ip)) {
- error = _xfs_log_force_lsn(ip->i_mount,
+ error = _xfs_log_force_lsn(mp,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
}
- if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
- /*
- * If the log write didn't issue an ordered tag we need
- * to flush the disk cache for the data device now.
- */
- if (!log_flushed)
- xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
-
- /*
- * If this inode is on the RT dev we need to flush that
- * cache as well.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
- }
+ /*
+ * If we only have a single device, and the log force about was
+ * a no-op we might have to flush the data device cache here.
+ * This can only happen for fdatasync/O_DSYNC if we were overwriting
+ * an already allocated file and thus do not have any metadata to
+ * commit.
+ */
+ if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+ mp->m_logdev_targp == mp->m_ddev_targp &&
+ !XFS_IS_REALTIME_INODE(ip) &&
+ !log_flushed)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
return -error;
}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index dd21784525a8..d44d92cd12b1 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -182,7 +182,7 @@ xfs_vn_mknod(
if (IS_POSIXACL(dir)) {
default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
if (IS_ERR(default_acl))
- return -PTR_ERR(default_acl);
+ return PTR_ERR(default_acl);
if (!default_acl)
mode &= ~current_umask();
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1e3a7ce804dc..a1a881e68a9a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -627,68 +627,6 @@ xfs_blkdev_put(
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
-/*
- * Try to write out the superblock using barriers.
- */
-STATIC int
-xfs_barrier_test(
- xfs_mount_t *mp)
-{
- xfs_buf_t *sbp = xfs_getsb(mp, 0);
- int error;
-
- XFS_BUF_UNDONE(sbp);
- XFS_BUF_UNREAD(sbp);
- XFS_BUF_UNDELAYWRITE(sbp);
- XFS_BUF_WRITE(sbp);
- XFS_BUF_UNASYNC(sbp);
- XFS_BUF_ORDERED(sbp);
-
- xfsbdstrat(mp, sbp);
- error = xfs_buf_iowait(sbp);
-
- /*
- * Clear all the flags we set and possible error state in the
- * buffer. We only did the write to try out whether barriers
- * worked and shouldn't leave any traces in the superblock
- * buffer.
- */
- XFS_BUF_DONE(sbp);
- XFS_BUF_ERROR(sbp, 0);
- XFS_BUF_UNORDERED(sbp);
-
- xfs_buf_relse(sbp);
- return error;
-}
-
-STATIC void
-xfs_mountfs_check_barriers(xfs_mount_t *mp)
-{
- int error;
-
- if (mp->m_logdev_targp != mp->m_ddev_targp) {
- xfs_notice(mp,
- "Disabling barriers, not supported with external log device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
- xfs_notice(mp,
- "Disabling barriers, underlying device is readonly");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- error = xfs_barrier_test(mp);
- if (error) {
- xfs_notice(mp,
- "Disabling barriers, trial barrier write failed");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-}
-
void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
switch (token) {
case Opt_barrier:
mp->m_flags |= XFS_MOUNT_BARRIER;
-
- /*
- * Test if barriers are actually working if we can,
- * else delay this check until the filesystem is
- * marked writeable.
- */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY))
- xfs_mountfs_check_barriers(mp);
break;
case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
/* ro -> rw */
if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
mp->m_flags &= ~XFS_MOUNT_RDONLY;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
/*
* If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
-
error = xfs_filestream_mount(mp);
if (error)
goto out_free_sb;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index c86375378810..01d2072fb6d4 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -490,6 +490,13 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
args.whichfork = XFS_ATTR_FORK;
/*
+ * we have no control over the attribute names that userspace passes us
+ * to remove, so we have to allow the name lookup prior to attribute
+ * removal to fail.
+ */
+ args.op_flags = XFS_DA_OP_OKNOENT;
+
+ /*
* Attach the dquots to the inode.
*/
error = xfs_qm_dqattach(dp, 0);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index cb9b6d1469f7..3631783b2b53 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -253,16 +253,21 @@ xfs_iget_cache_hit(
rcu_read_lock();
spin_lock(&ip->i_flags_lock);
- ip->i_flags &= ~XFS_INEW;
- ip->i_flags |= XFS_IRECLAIMABLE;
- __xfs_inode_set_reclaim_tag(pag, ip);
+ ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
+ ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
trace_xfs_iget_reclaim_fail(ip);
goto out_error;
}
spin_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
- ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
+
+ /*
+ * Clear the per-lifetime state in the inode as we are now
+ * effectively a new inode and need to return to the initial
+ * state before reuse occurs.
+ */
+ ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
ip->i_flags |= XFS_INEW;
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
inode->i_state = I_NEW;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3ae6d58e5473..964cfea77686 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -384,6 +384,16 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
/*
+ * Per-lifetime flags need to be reset when re-using a reclaimable inode during
+ * inode lookup. Thi prevents unintended behaviour on the new inode from
+ * ocurring.
+ */
+#define XFS_IRECLAIM_RESET_FLAGS \
+ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \
+ XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
+ XFS_IFILESTREAM);
+
+/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
* 1<<16 - 1<<32-1 -- lockdep annotation (integers)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 09983a3344a5..b1e88d56069c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -681,15 +681,15 @@ xfs_inode_item_unlock(
* where the cluster buffer may be unpinned before the inode is inserted into
* the AIL during transaction committed processing. If the buffer is unpinned
* before the inode item has been committed and inserted, then it is possible
- * for the buffer to be written and IO completions before the inode is inserted
+ * for the buffer to be written and IO completes before the inode is inserted
* into the AIL. In that case, we'd be inserting a clean, stale inode into the
* AIL which will never get removed. It will, however, get reclaimed which
* triggers an assert in xfs_inode_free() complaining about freein an inode
* still in the AIL.
*
- * To avoid this, return a lower LSN than the one passed in so that the
- * transaction committed code will not move the inode forward in the AIL but
- * will still unpin it properly.
+ * To avoid this, just unpin the inode directly and return a LSN of -1 so the
+ * transaction committed code knows that it does not need to do any further
+ * processing on the item.
*/
STATIC xfs_lsn_t
xfs_inode_item_committed(
@@ -699,8 +699,10 @@ xfs_inode_item_committed(
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
- if (xfs_iflags_test(ip, XFS_ISTALE))
- return lsn - 1;
+ if (xfs_iflags_test(ip, XFS_ISTALE)) {
+ xfs_inode_item_unpin(lip, 0);
+ return -1;
+ }
return lsn;
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 211930246f20..41d5b8f2bf92 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log,
XFS_BUF_ASYNC(bp);
bp->b_flags |= XBF_LOG_BUFFER;
- if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+ if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If we have an external log device, flush the data device
+ * before flushing the log to make sure all meta data
+ * written back from the AIL actually made it to disk
+ * before writing out the new log tail LSN in the log buffer.
+ */
+ if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
XFS_BUF_ORDERED(bp);
+ }
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 7c7bc2b786bd..c83f63b33aae 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1361,7 +1361,7 @@ xfs_trans_item_committed(
lip->li_flags |= XFS_LI_ABORTED;
item_lsn = IOP_COMMITTED(lip, commit_lsn);
- /* If the committed routine returns -1, item has been freed. */
+ /* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
return;
@@ -1474,7 +1474,7 @@ xfs_trans_committed_bulk(
lip->li_flags |= XFS_LI_ABORTED;
item_lsn = IOP_COMMITTED(lip, commit_lsn);
- /* item_lsn of -1 means the item was freed */
+ /* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
continue;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b7a5fe7c52c8..619720705bc6 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -960,8 +960,11 @@ xfs_release(
* be exposed to that problem.
*/
truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
- if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
- xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
+ if (truncated) {
+ xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
+ if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
+ xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
+ }
}
if (ip->i_d.di_nlink == 0)