summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c6
-rw-r--r--fs/block_dev.c129
-rw-r--r--fs/btrfs/backref.c28
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/ctree.c70
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent_io.c4
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/ioctl.c8
-rw-r--r--fs/btrfs/qgroup.c17
-rw-r--r--fs/btrfs/send.c156
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/volumes.c7
-rw-r--r--fs/buffer.c157
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/char_dev.c18
-rw-r--r--fs/cifs/cifsacl.c49
-rw-r--r--fs/cifs/dir.c11
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/smb1ops.c3
-rw-r--r--fs/compat_ioctl.c5
-rw-r--r--fs/coredump.c5
-rw-r--r--fs/debugfs/inode.c1
-rw-r--r--fs/devpts/inode.c61
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/dlm/Kconfig2
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lock.c16
-rw-r--r--fs/dlm/lowcomms.c5
-rw-r--r--fs/dlm/recover.c37
-rw-r--r--fs/eventpoll.c38
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext3/balloc.c5
-rw-r--r--fs/ext3/namei.c40
-rw-r--r--fs/ext3/namei.h19
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/balloc.c8
-rw-r--r--fs/ext4/bitmap.c6
-rw-r--r--fs/ext4/ext4.h7
-rw-r--r--fs/ext4/ext4_jbd2.c8
-rw-r--r--fs/ext4/extents.c60
-rw-r--r--fs/ext4/ialloc.c23
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/file.c19
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/gfs2/file.c14
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/lops.c16
-rw-r--r--fs/gfs2/quota.c7
-rw-r--r--fs/gfs2/rgrp.c33
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trans.c8
-rw-r--r--fs/hugetlbfs/inode.c109
-rw-r--r--fs/inode.c18
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jffs2/file.c39
-rw-r--r--fs/jfs/jfs_discard.c16
-rw-r--r--fs/lockd/clntxdr.c2
-rw-r--r--fs/lockd/mon.c57
-rw-r--r--fs/lockd/svcproc.c3
-rw-r--r--fs/namei.c9
-rw-r--r--fs/nfs/callback.c2
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/dns_resolve.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h6
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/namespace.c19
-rw-r--r--fs/nfs/nfs4filelayout.c21
-rw-r--r--fs/nfs/nfs4filelayout.h1
-rw-r--r--fs/nfs/nfs4filelayoutdev.c22
-rw-r--r--fs/nfs/nfs4getroot.c1
-rw-r--r--fs/nfs/nfs4namespace.c3
-rw-r--r--fs/nfs/nfs4proc.c46
-rw-r--r--fs/nfs/objlayout/objio_osd.c6
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/super.c51
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/notify/fanotify/fanotify_user.c3
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c124
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/stat.c14
-rw-r--r--fs/proc/task_mmu.c53
-rw-r--r--fs/pstore/inode.c7
-rw-r--r--fs/pstore/internal.h2
-rw-r--r--fs/pstore/platform.c14
-rw-r--r--fs/pstore/ram.c9
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c60
-rw-r--r--fs/splice.c5
-rw-r--r--fs/sysfs/dir.c16
-rw-r--r--fs/sysfs/file.c4
-rw-r--r--fs/ubifs/find.c12
-rw-r--r--fs/ubifs/lprops.c6
-rw-r--r--fs/ubifs/ubifs.h3
-rw-r--r--fs/xattr.c2
110 files changed, 1268 insertions, 757 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 9298c65ad9c7..b96fc6ce4855 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -75,6 +75,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
unsigned int sz = sizeof(struct bio) + extra_size;
struct kmem_cache *slab = NULL;
struct bio_slab *bslab, *new_bio_slabs;
+ unsigned int new_bio_slab_max;
unsigned int i, entry = -1;
mutex_lock(&bio_slab_lock);
@@ -97,12 +98,13 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
goto out_unlock;
if (bio_slab_nr == bio_slab_max && entry == -1) {
- bio_slab_max <<= 1;
+ new_bio_slab_max = bio_slab_max << 1;
new_bio_slabs = krealloc(bio_slabs,
- bio_slab_max * sizeof(struct bio_slab),
+ new_bio_slab_max * sizeof(struct bio_slab),
GFP_KERNEL);
if (!new_bio_slabs)
goto out_unlock;
+ bio_slab_max = new_bio_slab_max;
bio_slabs = new_bio_slabs;
}
if (entry == -1)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b3c1d3dae77d..ab3a456f6650 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode,
spin_unlock(&dst->wb.list_lock);
}
-sector_t blkdev_max_block(struct block_device *bdev)
-{
- sector_t retval = ~((sector_t)0);
- loff_t sz = i_size_read(bdev->bd_inode);
-
- if (sz) {
- unsigned int size = block_size(bdev);
- unsigned int sizebits = blksize_bits(size);
- retval = (sz >> sizebits);
- }
- return retval;
-}
-
/* Kill _all_ buffers and pagecache , dirty or not.. */
void kill_bdev(struct block_device *bdev)
{
@@ -116,8 +103,6 @@ EXPORT_SYMBOL(invalidate_bdev);
int set_blocksize(struct block_device *bdev, int size)
{
- struct address_space *mapping;
-
/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
return -EINVAL;
@@ -126,19 +111,6 @@ int set_blocksize(struct block_device *bdev, int size)
if (size < bdev_logical_block_size(bdev))
return -EINVAL;
- /* Prevent starting I/O or mapping the device */
- percpu_down_write(&bdev->bd_block_size_semaphore);
-
- /* Check that the block device is not memory mapped */
- mapping = bdev->bd_inode->i_mapping;
- mutex_lock(&mapping->i_mmap_mutex);
- if (mapping_mapped(mapping)) {
- mutex_unlock(&mapping->i_mmap_mutex);
- percpu_up_write(&bdev->bd_block_size_semaphore);
- return -EBUSY;
- }
- mutex_unlock(&mapping->i_mmap_mutex);
-
/* Don't change the size if it is same as current */
if (bdev->bd_block_size != size) {
sync_blockdev(bdev);
@@ -146,9 +118,6 @@ int set_blocksize(struct block_device *bdev, int size)
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
}
-
- percpu_up_write(&bdev->bd_block_size_semaphore);
-
return 0;
}
@@ -181,52 +150,12 @@ static int
blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- if (iblock >= blkdev_max_block(I_BDEV(inode))) {
- if (create)
- return -EIO;
-
- /*
- * for reads, we're just trying to fill a partial page.
- * return a hole, they will have to call get_block again
- * before they can fill it, and they will get -EIO at that
- * time
- */
- return 0;
- }
bh->b_bdev = I_BDEV(inode);
bh->b_blocknr = iblock;
set_buffer_mapped(bh);
return 0;
}
-static int
-blkdev_get_blocks(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- sector_t end_block = blkdev_max_block(I_BDEV(inode));
- unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
-
- if ((iblock + max_blocks) > end_block) {
- max_blocks = end_block - iblock;
- if ((long)max_blocks <= 0) {
- if (create)
- return -EIO; /* write fully beyond EOF */
- /*
- * It is a read which is fully beyond EOF. We return
- * a !buffer_mapped buffer
- */
- max_blocks = 0;
- }
- }
-
- bh->b_bdev = I_BDEV(inode);
- bh->b_blocknr = iblock;
- bh->b_size = max_blocks << inode->i_blkbits;
- if (max_blocks)
- set_buffer_mapped(bh);
- return 0;
-}
-
static ssize_t
blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
@@ -235,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_mapping->host;
return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
- nr_segs, blkdev_get_blocks, NULL, NULL, 0);
+ nr_segs, blkdev_get_block, NULL, NULL, 0);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -459,12 +388,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
if (!ei)
return NULL;
-
- if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
- kmem_cache_free(bdev_cachep, ei);
- return NULL;
- }
-
return &ei->vfs_inode;
}
@@ -473,8 +396,6 @@ static void bdev_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
- percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
-
kmem_cache_free(bdev_cachep, bdi);
}
@@ -1593,22 +1514,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return blkdev_ioctl(bdev, mode, cmd, arg);
}
-ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- ssize_t ret;
- struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
-
- percpu_down_read(&bdev->bd_block_size_semaphore);
-
- ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
-
- percpu_up_read(&bdev->bd_block_size_semaphore);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(blkdev_aio_read);
-
/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
@@ -1620,16 +1525,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
- struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
blk_start_plug(&plug);
-
- percpu_down_read(&bdev->bd_block_size_semaphore);
-
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
@@ -1638,27 +1539,25 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
-
- percpu_up_read(&bdev->bd_block_size_semaphore);
-
blk_finish_plug(&plug);
-
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);
-static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
- int ret;
- struct block_device *bdev = I_BDEV(file->f_mapping->host);
-
- percpu_down_read(&bdev->bd_block_size_semaphore);
-
- ret = generic_file_mmap(file, vma);
+ struct file *file = iocb->ki_filp;
+ struct inode *bd_inode = file->f_mapping->host;
+ loff_t size = i_size_read(bd_inode);
- percpu_up_read(&bdev->bd_block_size_semaphore);
+ if (pos >= size)
+ return 0;
- return ret;
+ size -= pos;
+ if (size < INT_MAX)
+ nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
+ return generic_file_aio_read(iocb, iov, nr_segs, pos);
}
/*
@@ -1691,9 +1590,9 @@ const struct file_operations def_blk_fops = {
.llseek = block_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .aio_read = blkdev_aio_read,
+ .aio_read = blkdev_aio_read,
.aio_write = blkdev_aio_write,
- .mmap = blkdev_mmap,
+ .mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f3187938e081..208d8aa5b07e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -283,9 +283,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;
}
- rcu_read_lock();
- root_level = btrfs_header_level(root->node);
- rcu_read_unlock();
+ root_level = btrfs_old_root_level(root, time_seq);
if (root_level + 1 == level)
goto out;
@@ -1177,16 +1175,15 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
return ret;
}
-static char *ref_to_path(struct btrfs_root *fs_root,
- struct btrfs_path *path,
- u32 name_len, unsigned long name_off,
- struct extent_buffer *eb_in, u64 parent,
- char *dest, u32 size)
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+ u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb_in, u64 parent,
+ char *dest, u32 size)
{
int slot;
u64 next_inum;
int ret;
- s64 bytes_left = size - 1;
+ s64 bytes_left = ((s64)size) - 1;
struct extent_buffer *eb = eb_in;
struct btrfs_key found_key;
int leave_spinning = path->leave_spinning;
@@ -1266,10 +1263,10 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root,
struct extent_buffer *eb_in, u64 parent,
char *dest, u32 size)
{
- return ref_to_path(fs_root, path,
- btrfs_inode_ref_name_len(eb_in, iref),
- (unsigned long)(iref + 1),
- eb_in, parent, dest, size);
+ return btrfs_ref_to_path(fs_root, path,
+ btrfs_inode_ref_name_len(eb_in, iref),
+ (unsigned long)(iref + 1),
+ eb_in, parent, dest, size);
}
/*
@@ -1715,9 +1712,8 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
ipath->fspath->bytes_left - s_ptr : 0;
fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
- fspath = ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
- name_off, eb, inum, fspath_min,
- bytes_left);
+ fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
+ name_off, eb, inum, fspath_min, bytes_left);
if (IS_ERR(fspath))
return PTR_ERR(fspath);
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index e75533043a5f..d61feca79455 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -62,6 +62,10 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref, struct extent_buffer *eb,
u64 parent, char *dest, u32 size);
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+ u32 name_len, unsigned long name_off,
+ struct extent_buffer *eb_in, u64 parent,
+ char *dest, u32 size);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b33436211000..cdfb4c49a806 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -596,6 +596,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
if (tree_mod_dont_log(fs_info, eb))
return 0;
+ /*
+ * When we override something during the move, we log these removals.
+ * This can only happen when we move towards the beginning of the
+ * buffer, i.e. dst_slot < src_slot.
+ */
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
MOD_LOG_KEY_REMOVE_WHILE_MOVING);
@@ -647,8 +652,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (tree_mod_dont_log(fs_info, NULL))
return 0;
- __tree_mod_log_free_eb(fs_info, old_root);
-
ret = tree_mod_alloc(fs_info, flags, &tm);
if (ret < 0)
goto out;
@@ -926,12 +929,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, buf, 1, 1);
BUG_ON(ret); /* -ENOMEM */
}
- /*
- * don't log freeing in case we're freeing the root node, this
- * is done by tree_mod_log_set_root_pointer later
- */
- if (buf != root->node && btrfs_header_level(buf) != 0)
- tree_mod_log_free_eb(root->fs_info, buf);
+ tree_mod_log_free_eb(root->fs_info, buf);
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
@@ -1225,6 +1223,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
free_extent_buffer(eb);
__tree_mod_log_rewind(eb_rewin, time_seq, tm);
+ WARN_ON(btrfs_header_nritems(eb_rewin) >
+ BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
return eb_rewin;
}
@@ -1241,9 +1241,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
{
struct tree_mod_elem *tm;
struct extent_buffer *eb;
+ struct extent_buffer *old;
struct tree_mod_root *old_root = NULL;
u64 old_generation = 0;
u64 logical;
+ u32 blocksize;
eb = btrfs_read_lock_root_node(root);
tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
@@ -1259,14 +1261,32 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
}
tm = tree_mod_log_search(root->fs_info, logical, time_seq);
- if (old_root)
+ if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
+ btrfs_tree_read_unlock(root->node);
+ free_extent_buffer(root->node);
+ blocksize = btrfs_level_size(root, old_root->level);
+ old = read_tree_block(root, logical, blocksize, 0);
+ if (!old) {
+ pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
+ logical);
+ WARN_ON(1);
+ } else {
+ eb = btrfs_clone_extent_buffer(old);
+ free_extent_buffer(old);
+ }
+ } else if (old_root) {
+ btrfs_tree_read_unlock(root->node);
+ free_extent_buffer(root->node);
eb = alloc_dummy_extent_buffer(logical, root->nodesize);
- else
+ } else {
eb = btrfs_clone_extent_buffer(root->node);
- btrfs_tree_read_unlock(root->node);
- free_extent_buffer(root->node);
+ btrfs_tree_read_unlock(root->node);
+ free_extent_buffer(root->node);
+ }
+
if (!eb)
return NULL;
+ extent_buffer_get(eb);
btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
@@ -1279,11 +1299,28 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
__tree_mod_log_rewind(eb, time_seq, tm);
else
WARN_ON(btrfs_header_level(eb) != 0);
- extent_buffer_get(eb);
+ WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
return eb;
}
+int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
+{
+ struct tree_mod_elem *tm;
+ int level;
+
+ tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+ if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
+ level = tm->old_root.level;
+ } else {
+ rcu_read_lock();
+ level = btrfs_header_level(root->node);
+ rcu_read_unlock();
+ }
+
+ return level;
+}
+
static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
@@ -1725,6 +1762,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
+ tree_mod_log_free_eb(root->fs_info, root->node);
tree_mod_log_set_root_pointer(root, child);
rcu_assign_pointer(root->node, child);
@@ -2970,8 +3008,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
push_items * sizeof(struct btrfs_key_ptr));
if (push_items < src_nritems) {
- tree_mod_log_eb_move(root->fs_info, src, 0, push_items,
- src_nritems - push_items);
+ /*
+ * don't call tree_mod_log_eb_move here, key removal was already
+ * fully logged by tree_mod_log_eb_copy above.
+ */
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(push_items),
(src_nritems - push_items) *
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 926c9ffc66d9..c72ead869507 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3120,6 +3120,7 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic_inc_return(&fs_info->tree_mod_seq);
}
+int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
@@ -3338,6 +3339,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
+int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode);
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7cda51995c1e..22a0439e5a86 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3416,8 +3416,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
num_dirty = root->fs_info->dirty_metadata_bytes;
if (num_dirty > thresh) {
- balance_dirty_pages_ratelimited_nr(
- root->fs_info->btree_inode->i_mapping, 1);
+ balance_dirty_pages_ratelimited(
+ root->fs_info->btree_inode->i_mapping);
}
return;
}
@@ -3437,8 +3437,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
num_dirty = root->fs_info->dirty_metadata_bytes;
if (num_dirty > thresh) {
- balance_dirty_pages_ratelimited_nr(
- root->fs_info->btree_inode->i_mapping, 1);
+ balance_dirty_pages_ratelimited(
+ root->fs_info->btree_inode->i_mapping);
}
return;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8036d3a84853..472873a94d96 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4110,8 +4110,8 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
return eb;
err:
- for (i--; i >= 0; i--)
- __free_page(eb->pages[i]);
+ for (; i > 0; i--)
+ __free_page(eb->pages[i - 1]);
__free_extent_buffer(eb);
return NULL;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9ab1bed88116..a8ee75cb96ee 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1346,8 +1346,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
cond_resched();
- balance_dirty_pages_ratelimited_nr(inode->i_mapping,
- dirty_pages);
+ balance_dirty_pages_ratelimited(inode->i_mapping);
if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 85a1e5053fe6..95542a1b3dfc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -94,8 +94,6 @@ static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
-static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
@@ -2746,8 +2744,9 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
return btrfs_update_inode_item(trans, root, inode);
}
-static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
+noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode)
{
int ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 61168805f175..5b3429ab8ec1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -343,7 +343,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
return -EOPNOTSUPP;
if (copy_from_user(&range, arg, sizeof(range)))
return -EFAULT;
- if (range.start > total_bytes)
+ if (range.start > total_bytes ||
+ range.len < fs_info->sb->s_blocksize)
return -EINVAL;
range.len = min(range.len, total_bytes - range.start);
@@ -570,7 +571,8 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
ret = btrfs_commit_transaction(trans,
root->fs_info->extent_root);
}
- BUG_ON(ret);
+ if (ret)
+ goto fail;
ret = pending_snapshot->error;
if (ret)
@@ -1223,7 +1225,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
}
defrag_count += ret;
- balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
+ balance_dirty_pages_ratelimited(inode->i_mapping);
mutex_unlock(&inode->i_mutex);
if (newer_than) {
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5039686df6ae..fe9d02c45f8e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -790,8 +790,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
}
path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ ret = -ENOMEM;
+ goto out_free_root;
+ }
key.objectid = 0;
key.type = BTRFS_QGROUP_STATUS_KEY;
@@ -800,7 +802,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*ptr));
if (ret)
- goto out;
+ goto out_free_path;
leaf = path->nodes[0];
ptr = btrfs_item_ptr(leaf, path->slots[0],
@@ -818,8 +820,15 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
fs_info->quota_root = quota_root;
fs_info->pending_quota_state = 1;
spin_unlock(&fs_info->qgroup_lock);
-out:
+out_free_path:
btrfs_free_path(path);
+out_free_root:
+ if (ret) {
+ free_extent_buffer(quota_root->node);
+ free_extent_buffer(quota_root->commit_root);
+ kfree(quota_root);
+ }
+out:
return ret;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c7beb543a4a8..e78b297b0b00 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -745,31 +745,36 @@ typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
void *ctx);
/*
- * Helper function to iterate the entries in ONE btrfs_inode_ref.
+ * Helper function to iterate the entries in ONE btrfs_inode_ref or
+ * btrfs_inode_extref.
* The iterate callback may return a non zero value to stop iteration. This can
* be a negative value for error codes or 1 to simply stop it.
*
- * path must point to the INODE_REF when called.
+ * path must point to the INODE_REF or INODE_EXTREF when called.
*/
static int iterate_inode_ref(struct send_ctx *sctx,
struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *found_key, int resolve,
iterate_inode_ref_t iterate, void *ctx)
{
- struct extent_buffer *eb;
+ struct extent_buffer *eb = path->nodes[0];
struct btrfs_item *item;
struct btrfs_inode_ref *iref;
+ struct btrfs_inode_extref *extref;
struct btrfs_path *tmp_path;
struct fs_path *p;
- u32 cur;
- u32 len;
+ u32 cur = 0;
u32 total;
- int slot;
+ int slot = path->slots[0];
u32 name_len;
char *start;
int ret = 0;
- int num;
+ int num = 0;
int index;
+ u64 dir;
+ unsigned long name_off;
+ unsigned long elem_size;
+ unsigned long ptr;
p = fs_path_alloc_reversed(sctx);
if (!p)
@@ -781,24 +786,40 @@ static int iterate_inode_ref(struct send_ctx *sctx,
return -ENOMEM;
}
- eb = path->nodes[0];
- slot = path->slots[0];
- item = btrfs_item_nr(eb, slot);
- iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
- cur = 0;
- len = 0;
- total = btrfs_item_size(eb, item);
- num = 0;
+ if (found_key->type == BTRFS_INODE_REF_KEY) {
+ ptr = (unsigned long)btrfs_item_ptr(eb, slot,
+ struct btrfs_inode_ref);
+ item = btrfs_item_nr(eb, slot);
+ total = btrfs_item_size(eb, item);
+ elem_size = sizeof(*iref);
+ } else {
+ ptr = btrfs_item_ptr_offset(eb, slot);
+ total = btrfs_item_size_nr(eb, slot);
+ elem_size = sizeof(*extref);
+ }
+
while (cur < total) {
fs_path_reset(p);
- name_len = btrfs_inode_ref_name_len(eb, iref);
- index = btrfs_inode_ref_index(eb, iref);
+ if (found_key->type == BTRFS_INODE_REF_KEY) {
+ iref = (struct btrfs_inode_ref *)(ptr + cur);
+ name_len = btrfs_inode_ref_name_len(eb, iref);
+ name_off = (unsigned long)(iref + 1);
+ index = btrfs_inode_ref_index(eb, iref);
+ dir = found_key->offset;
+ } else {
+ extref = (struct btrfs_inode_extref *)(ptr + cur);
+ name_len = btrfs_inode_extref_name_len(eb, extref);
+ name_off = (unsigned long)&extref->name;
+ index = btrfs_inode_extref_index(eb, extref);
+ dir = btrfs_inode_extref_parent(eb, extref);
+ }
+
if (resolve) {
- start = btrfs_iref_to_path(root, tmp_path, iref, eb,
- found_key->offset, p->buf,
- p->buf_len);
+ start = btrfs_ref_to_path(root, tmp_path, name_len,
+ name_off, eb, dir,
+ p->buf, p->buf_len);
if (IS_ERR(start)) {
ret = PTR_ERR(start);
goto out;
@@ -809,9 +830,10 @@ static int iterate_inode_ref(struct send_ctx *sctx,
p->buf_len + p->buf - start);
if (ret < 0)
goto out;
- start = btrfs_iref_to_path(root, tmp_path, iref,
- eb, found_key->offset, p->buf,
- p->buf_len);
+ start = btrfs_ref_to_path(root, tmp_path,
+ name_len, name_off,
+ eb, dir,
+ p->buf, p->buf_len);
if (IS_ERR(start)) {
ret = PTR_ERR(start);
goto out;
@@ -820,21 +842,16 @@ static int iterate_inode_ref(struct send_ctx *sctx,
}
p->start = start;
} else {
- ret = fs_path_add_from_extent_buffer(p, eb,
- (unsigned long)(iref + 1), name_len);
+ ret = fs_path_add_from_extent_buffer(p, eb, name_off,
+ name_len);
if (ret < 0)
goto out;
}
-
- len = sizeof(*iref) + name_len;
- iref = (struct btrfs_inode_ref *)((char *)iref + len);
- cur += len;
-
- ret = iterate(num, found_key->offset, index, p, ctx);
+ cur += elem_size + name_len;
+ ret = iterate(num, dir, index, p, ctx);
if (ret)
goto out;
-
num++;
}
@@ -998,7 +1015,8 @@ static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root,
}
btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
if (found_key.objectid != ino ||
- found_key.type != BTRFS_INODE_REF_KEY) {
+ (found_key.type != BTRFS_INODE_REF_KEY &&
+ found_key.type != BTRFS_INODE_EXTREF_KEY)) {
ret = -ENOENT;
goto out;
}
@@ -1551,8 +1569,8 @@ static int get_first_ref(struct send_ctx *sctx,
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_path *path;
- struct btrfs_inode_ref *iref;
int len;
+ u64 parent_dir;
path = alloc_path_for_send();
if (!path)
@@ -1568,27 +1586,41 @@ static int get_first_ref(struct send_ctx *sctx,
if (!ret)
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
- if (ret || found_key.objectid != key.objectid ||
- found_key.type != key.type) {
+ if (ret || found_key.objectid != ino ||
+ (found_key.type != BTRFS_INODE_REF_KEY &&
+ found_key.type != BTRFS_INODE_EXTREF_KEY)) {
ret = -ENOENT;
goto out;
}
- iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_inode_ref);
- len = btrfs_inode_ref_name_len(path->nodes[0], iref);
- ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
- (unsigned long)(iref + 1), len);
+ if (key.type == BTRFS_INODE_REF_KEY) {
+ struct btrfs_inode_ref *iref;
+ iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_ref);
+ len = btrfs_inode_ref_name_len(path->nodes[0], iref);
+ ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
+ (unsigned long)(iref + 1),
+ len);
+ parent_dir = found_key.offset;
+ } else {
+ struct btrfs_inode_extref *extref;
+ extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_extref);
+ len = btrfs_inode_extref_name_len(path->nodes[0], extref);
+ ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
+ (unsigned long)&extref->name, len);
+ parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
+ }
if (ret < 0)
goto out;
btrfs_release_path(path);
- ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL,
+ ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
NULL, NULL);
if (ret < 0)
goto out;
- *dir = found_key.offset;
+ *dir = parent_dir;
out:
btrfs_free_path(path);
@@ -2430,7 +2462,8 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino);
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
S_ISFIFO(mode) || S_ISSOCK(mode)) {
- TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
}
ret = send_cmd(sctx);
@@ -3226,7 +3259,8 @@ static int process_all_refs(struct send_ctx *sctx,
btrfs_item_key_to_cpu(eb, &found_key, slot);
if (found_key.objectid != key.objectid ||
- found_key.type != key.type)
+ (found_key.type != BTRFS_INODE_REF_KEY &&
+ found_key.type != BTRFS_INODE_EXTREF_KEY))
break;
ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb,
@@ -3987,7 +4021,7 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
if (sctx->cur_ino == 0)
goto out;
if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
- sctx->cmp_key->type <= BTRFS_INODE_REF_KEY)
+ sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
goto out;
if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
goto out;
@@ -4033,22 +4067,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
if (ret < 0)
goto out;
- if (!S_ISLNK(sctx->cur_inode_mode)) {
- if (!sctx->parent_root || sctx->cur_inode_new) {
+ if (!sctx->parent_root || sctx->cur_inode_new) {
+ need_chown = 1;
+ if (!S_ISLNK(sctx->cur_inode_mode))
need_chmod = 1;
- need_chown = 1;
- } else {
- ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
- NULL, NULL, &right_mode, &right_uid,
- &right_gid, NULL);
- if (ret < 0)
- goto out;
+ } else {
+ ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
+ NULL, NULL, &right_mode, &right_uid,
+ &right_gid, NULL);
+ if (ret < 0)
+ goto out;
- if (left_uid != right_uid || left_gid != right_gid)
- need_chown = 1;
- if (left_mode != right_mode)
- need_chmod = 1;
- }
+ if (left_uid != right_uid || left_gid != right_gid)
+ need_chown = 1;
+ if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
+ need_chmod = 1;
}
if (S_ISREG(sctx->cur_inode_mode)) {
@@ -4335,7 +4368,8 @@ static int changed_cb(struct btrfs_root *left_root,
if (key->type == BTRFS_INODE_ITEM_KEY)
ret = changed_inode(sctx, result);
- else if (key->type == BTRFS_INODE_REF_KEY)
+ else if (key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY)
ret = changed_ref(sctx, result);
else if (key->type == BTRFS_XATTR_ITEM_KEY)
ret = changed_xattr(sctx, result);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 77db875b5116..04bbfb1052eb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1200,7 +1200,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
dentry->d_name.len * 2);
parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, parent_root, parent_inode);
+ ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
if (ret)
btrfs_abort_transaction(trans, root, ret);
fail:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 029b903a4ae3..0f5ebb72a5ea 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1819,6 +1819,13 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
"Failed to relocate sys chunks after "
"device initialization. This can be fixed "
"using the \"btrfs balance\" command.");
+ trans = btrfs_attach_transaction(root);
+ if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) == -ENOENT)
+ return 0;
+ return PTR_ERR(trans);
+ }
+ ret = btrfs_commit_transaction(trans, root);
}
return ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index b5f044283edb..6e9ed48064fc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -555,7 +555,7 @@ void emergency_thaw_all(void)
*/
int sync_mapping_buffers(struct address_space *mapping)
{
- struct address_space *buffer_mapping = mapping->assoc_mapping;
+ struct address_space *buffer_mapping = mapping->private_data;
if (buffer_mapping == NULL || list_empty(&mapping->private_list))
return 0;
@@ -588,10 +588,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
struct address_space *buffer_mapping = bh->b_page->mapping;
mark_buffer_dirty(bh);
- if (!mapping->assoc_mapping) {
- mapping->assoc_mapping = buffer_mapping;
+ if (!mapping->private_data) {
+ mapping->private_data = buffer_mapping;
} else {
- BUG_ON(mapping->assoc_mapping != buffer_mapping);
+ BUG_ON(mapping->private_data != buffer_mapping);
}
if (!bh->b_assoc_map) {
spin_lock(&buffer_mapping->private_lock);
@@ -788,7 +788,7 @@ void invalidate_inode_buffers(struct inode *inode)
if (inode_has_buffers(inode)) {
struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->private_list;
- struct address_space *buffer_mapping = mapping->assoc_mapping;
+ struct address_space *buffer_mapping = mapping->private_data;
spin_lock(&buffer_mapping->private_lock);
while (!list_empty(list))
@@ -811,7 +811,7 @@ int remove_inode_buffers(struct inode *inode)
if (inode_has_buffers(inode)) {
struct address_space *mapping = &inode->i_data;
struct list_head *list = &mapping->private_list;
- struct address_space *buffer_mapping = mapping->assoc_mapping;
+ struct address_space *buffer_mapping = mapping->private_data;
spin_lock(&buffer_mapping->private_lock);
while (!list_empty(list)) {
@@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
attach_page_buffers(page, head);
}
+static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
+{
+ sector_t retval = ~((sector_t)0);
+ loff_t sz = i_size_read(bdev->bd_inode);
+
+ if (sz) {
+ unsigned int sizebits = blksize_bits(size);
+ retval = (sz >> sizebits);
+ }
+ return retval;
+}
+
/*
* Initialise the state of a blockdev page's buffers.
*/
@@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh = head;
int uptodate = PageUptodate(page);
- sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode));
+ sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
do {
if (!buffer_mapped(bh)) {
@@ -1553,6 +1565,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
EXPORT_SYMBOL(unmap_underlying_metadata);
/*
+ * Size is a power-of-two in the range 512..PAGE_SIZE,
+ * and the case we care about most is PAGE_SIZE.
+ *
+ * So this *could* possibly be written with those
+ * constraints in mind (relevant mostly if some
+ * architecture has a slow bit-scan instruction)
+ */
+static inline int block_size_bits(unsigned int blocksize)
+{
+ return ilog2(blocksize);
+}
+
+static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
+{
+ BUG_ON(!PageLocked(page));
+
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
+ return page_buffers(page);
+}
+
+/*
* NOTE! All mapped/uptodate combinations are valid:
*
* Mapped Uptodate Meaning
@@ -1589,19 +1623,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
sector_t block;
sector_t last_block;
struct buffer_head *bh, *head;
- const unsigned blocksize = 1 << inode->i_blkbits;
+ unsigned int blocksize, bbits;
int nr_underway = 0;
int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC : WRITE);
- BUG_ON(!PageLocked(page));
-
- last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
-
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, blocksize,
+ head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
- }
/*
* Be very careful. We have no exclusion from __set_page_dirty_buffers
@@ -1613,9 +1641,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
* handle that here by just cleaning them.
*/
- block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- head = page_buffers(page);
bh = head;
+ blocksize = bh->b_size;
+ bbits = block_size_bits(blocksize);
+
+ block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+ last_block = (i_size_read(inode) - 1) >> bbits;
/*
* Get all the dirty buffers mapped to disk addresses and
@@ -1806,12 +1837,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
BUG_ON(to > PAGE_CACHE_SIZE);
BUG_ON(from > to);
- blocksize = 1 << inode->i_blkbits;
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
- head = page_buffers(page);
+ head = create_page_buffers(page, inode, 0);
+ blocksize = head->b_size;
+ bbits = block_size_bits(blocksize);
- bbits = inode->i_blkbits;
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
for(bh = head, block_start = 0; bh != head || !block_start;
@@ -1881,11 +1910,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
unsigned blocksize;
struct buffer_head *bh, *head;
- blocksize = 1 << inode->i_blkbits;
+ bh = head = page_buffers(page);
+ blocksize = bh->b_size;
- for(bh = head = page_buffers(page), block_start = 0;
- bh != head || !block_start;
- block_start=block_end, bh = bh->b_this_page) {
+ block_start = 0;
+ do {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
@@ -1895,7 +1924,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
mark_buffer_dirty(bh);
}
clear_buffer_new(bh);
- }
+
+ block_start = block_end;
+ bh = bh->b_this_page;
+ } while (bh != head);
/*
* If this is a partial write which happened to make all buffers
@@ -2020,7 +2052,6 @@ EXPORT_SYMBOL(generic_write_end);
int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
unsigned long from)
{
- struct inode *inode = page->mapping->host;
unsigned block_start, block_end, blocksize;
unsigned to;
struct buffer_head *bh, *head;
@@ -2029,13 +2060,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
if (!page_has_buffers(page))
return 0;
- blocksize = 1 << inode->i_blkbits;
+ head = page_buffers(page);
+ blocksize = head->b_size;
to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
to = from + to;
if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
return 0;
- head = page_buffers(page);
bh = head;
block_start = 0;
do {
@@ -2068,18 +2099,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
struct inode *inode = page->mapping->host;
sector_t iblock, lblock;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
- unsigned int blocksize;
+ unsigned int blocksize, bbits;
int nr, i;
int fully_mapped = 1;
- BUG_ON(!PageLocked(page));
- blocksize = 1 << inode->i_blkbits;
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
- head = page_buffers(page);
+ head = create_page_buffers(page, inode, 0);
+ blocksize = head->b_size;
+ bbits = block_size_bits(blocksize);
- iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
+ iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+ lblock = (i_size_read(inode)+blocksize-1) >> bbits;
bh = head;
nr = 0;
i = 0;
@@ -2864,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
bio_put(bio);
}
+/*
+ * This allows us to do IO even on the odd last sectors
+ * of a device, even if the bh block size is some multiple
+ * of the physical sector size.
+ *
+ * We'll just truncate the bio to the size of the device,
+ * and clear the end of the buffer head manually.
+ *
+ * Truly out-of-range accesses will turn into actual IO
+ * errors, this only handles the "we need to be able to
+ * do IO at the final sector" case.
+ */
+static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
+{
+ sector_t maxsector;
+ unsigned bytes;
+
+ maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
+ if (!maxsector)
+ return;
+
+ /*
+ * If the *whole* IO is past the end of the device,
+ * let it through, and the IO layer will turn it into
+ * an EIO.
+ */
+ if (unlikely(bio->bi_sector >= maxsector))
+ return;
+
+ maxsector -= bio->bi_sector;
+ bytes = bio->bi_size;
+ if (likely((bytes >> 9) <= maxsector))
+ return;
+
+ /* Uhhuh. We've got a bh that straddles the device size! */
+ bytes = maxsector << 9;
+
+ /* Truncate the bio.. */
+ bio->bi_size = bytes;
+ bio->bi_io_vec[0].bv_len = bytes;
+
+ /* ..and clear the end of the buffer for reads */
+ if ((rw & RW_MASK) == READ) {
+ void *kaddr = kmap_atomic(bh->b_page);
+ memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
+ kunmap_atomic(kaddr);
+ }
+}
+
int submit_bh(int rw, struct buffer_head * bh)
{
struct bio *bio;
@@ -2900,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh)
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
+ /* Take care of bh's that straddle the end of the device */
+ guard_bh_eod(rw, bio, bh);
+
bio_get(bio);
submit_bio(rw, bio);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 02ce90972d81..9349bb37a2fe 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -90,6 +90,8 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
*max_len = handle_length;
type = 255;
}
+ if (dentry)
+ dput(dentry);
return type;
}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3f152b92a94a..afc2bb691780 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -471,9 +471,19 @@ static int exact_lock(dev_t dev, void *data)
*/
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
+ int error;
+
p->dev = dev;
p->count = count;
- return kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p);
+
+ error = kobj_map(cdev_map, dev, count, NULL,
+ exact_match, exact_lock, p);
+ if (error)
+ return error;
+
+ kobject_get(p->kobj.parent);
+
+ return 0;
}
static void cdev_unmap(dev_t dev, unsigned count)
@@ -498,14 +508,20 @@ void cdev_del(struct cdev *p)
static void cdev_default_release(struct kobject *kobj)
{
struct cdev *p = container_of(kobj, struct cdev, kobj);
+ struct kobject *parent = kobj->parent;
+
cdev_purge(p);
+ kobject_put(parent);
}
static void cdev_dynamic_release(struct kobject *kobj)
{
struct cdev *p = container_of(kobj, struct cdev, kobj);
+ struct kobject *parent = kobj->parent;
+
cdev_purge(p);
kfree(p);
+ kobject_put(parent);
}
static struct kobj_type ktype_cdev_default = {
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fc783e264420..0fb15bbbe43c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
}
static void
+cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+ dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
+}
+
+static void
id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
struct cifs_sid_id **psidid, char *typestr)
{
@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
}
}
- memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+ cifs_copy_sid(&(*psidid)->sid, sidptr);
(*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
(*psidid)->refcount = 0;
@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
* any fields of the node after a reference is put .
*/
if (test_bit(SID_ID_MAPPED, &psidid->state)) {
- memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+ cifs_copy_sid(ssid, &psidid->sid);
psidid->time = jiffies; /* update ts for accessing */
goto id_sid_out;
}
@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cFYI(1, "%s: Can't map and id to a SID", __func__);
+ } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
+ rc = -EIO;
+ cFYI(1, "%s: Downcall contained malformed key "
+ "(datalen=%hu)", __func__, sidkey->datalen);
} else {
lsid = (struct cifs_sid *)sidkey->payload.data;
- memcpy(&psidid->sid, lsid,
- sidkey->datalen < sizeof(struct cifs_sid) ?
- sidkey->datalen : sizeof(struct cifs_sid));
- memcpy(ssid, &psidid->sid,
- sidkey->datalen < sizeof(struct cifs_sid) ?
- sidkey->datalen : sizeof(struct cifs_sid));
+ cifs_copy_sid(&psidid->sid, lsid);
+ cifs_copy_sid(ssid, &psidid->sid);
set_bit(SID_ID_MAPPED, &psidid->state);
key_put(sidkey);
kfree(psidid->sidstr);
@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
return rc;
}
if (test_bit(SID_ID_MAPPED, &psidid->state))
- memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+ cifs_copy_sid(ssid, &psidid->sid);
else
rc = -EINVAL;
}
@@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
static void copy_sec_desc(const struct cifs_ntsd *pntsd,
struct cifs_ntsd *pnntsd, __u32 sidsoffset)
{
- int i;
-
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->osidoffset));
nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
-
- nowner_sid_ptr->revision = owner_sid_ptr->revision;
- nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
- for (i = 0; i < 6; i++)
- nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
- for (i = 0; i < 5; i++)
- nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
+ cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
/* copy group sid */
group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->gsidoffset));
ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
sizeof(struct cifs_sid));
-
- ngroup_sid_ptr->revision = group_sid_ptr->revision;
- ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
- for (i = 0; i < 6; i++)
- ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
- for (i = 0; i < 5; i++)
- ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
+ cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
return;
}
@@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
kfree(nowner_sid_ptr);
return rc;
}
- memcpy(owner_sid_ptr, nowner_sid_ptr,
- sizeof(struct cifs_sid));
+ cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
kfree(nowner_sid_ptr);
*aclflag = CIFS_ACL_OWNER;
}
@@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
kfree(ngroup_sid_ptr);
return rc;
}
- memcpy(group_sid_ptr, ngroup_sid_ptr,
- sizeof(struct cifs_sid));
+ cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
kfree(ngroup_sid_ptr);
*aclflag = CIFS_ACL_GROUP;
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7c0a81283645..d3671f2acb29 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -398,7 +398,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
* in network traffic in the other paths.
*/
if (!(oflags & O_CREAT)) {
- struct dentry *res = cifs_lookup(inode, direntry, 0);
+ struct dentry *res;
+
+ /*
+ * Check for hashed negative dentry. We have already revalidated
+ * the dentry and it is fine. No need to perform another lookup.
+ */
+ if (!d_unhashed(direntry))
+ return -ENOENT;
+
+ res = cifs_lookup(inode, direntry, 0);
if (IS_ERR(res))
return PTR_ERR(res);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index edb25b4bbb95..70b6f4c3a0c1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1794,7 +1794,6 @@ static int cifs_writepages(struct address_space *mapping,
struct TCP_Server_Info *server;
struct page *page;
int rc = 0;
- loff_t isize = i_size_read(mapping->host);
/*
* If wsize is smaller than the page cache size, default to writing
@@ -1899,7 +1898,7 @@ retry:
*/
set_page_writeback(page);
- if (page_offset(page) >= isize) {
+ if (page_offset(page) >= i_size_read(mapping->host)) {
done = true;
unlock_page(page);
end_page_writeback(page);
@@ -1932,7 +1931,8 @@ retry:
wdata->offset = page_offset(wdata->pages[0]);
wdata->pagesz = PAGE_CACHE_SIZE;
wdata->tailsz =
- min(isize - page_offset(wdata->pages[nr_pages - 1]),
+ min(i_size_read(mapping->host) -
+ page_offset(wdata->pages[nr_pages - 1]),
(loff_t)PAGE_CACHE_SIZE);
wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
wdata->tailsz;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f9b5d3d6cf33..1c576e871366 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,14 +86,17 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
dentry = d_lookup(parent, name);
if (dentry) {
+ int err;
inode = dentry->d_inode;
/* update inode in place if i_ino didn't change */
if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
cifs_fattr_to_inode(inode, fattr);
return dentry;
}
- d_drop(dentry);
+ err = d_invalidate(dentry);
dput(dentry);
+ if (err)
+ return NULL;
}
dentry = d_alloc(parent, name);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 56cc4be87807..34cea2798333 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -766,7 +766,6 @@ smb_set_file_info(struct inode *inode, const char *full_path,
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink = NULL;
struct cifs_tcon *tcon;
- FILE_BASIC_INFO info_buf;
/* if the file is already open for write, just use that fileid */
open_file = find_writable_file(cinode, true);
@@ -817,7 +816,7 @@ smb_set_file_info(struct inode *inode, const char *full_path,
netpid = current->tgid;
set_via_filehandle:
- rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid);
+ rc = CIFSSMBSetFileInfo(xid, tcon, buf, netfid, netpid);
if (!rc)
cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index f5054025f9da..e2f57a007029 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -210,6 +210,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
err = get_user(palp, &up->palette);
err |= get_user(length, &up->length);
+ if (err)
+ return -EFAULT;
up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
err = put_user(compat_ptr(palp), &up_native->palette);
@@ -842,6 +844,9 @@ COMPATIBLE_IOCTL(TIOCGDEV)
COMPATIBLE_IOCTL(TIOCCBRK)
COMPATIBLE_IOCTL(TIOCGSID)
COMPATIBLE_IOCTL(TIOCGICOUNT)
+COMPATIBLE_IOCTL(TIOCGPKT)
+COMPATIBLE_IOCTL(TIOCGPTLCK)
+COMPATIBLE_IOCTL(TIOCGEXCL)
/* Little t */
COMPATIBLE_IOCTL(TIOCGETD)
COMPATIBLE_IOCTL(TIOCSETD)
diff --git a/fs/coredump.c b/fs/coredump.c
index fd37facac8dc..ce47379bfa61 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -450,11 +450,12 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
cp->file = files[1];
- replace_fd(0, files[0], 0);
+ err = replace_fd(0, files[0], 0);
+ fput(files[0]);
/* and disallow core files too */
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
- return 0;
+ return err;
}
void do_coredump(siginfo_t *siginfo, struct pt_regs *regs)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b607d92cdf24..153bb1e42e63 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -59,7 +59,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
case S_IFDIR:
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
- inode->i_private = NULL;
/* directory inodes start off with i_nlink == 2
* (for "." entry) */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 14afbabe6546..472e6befc54d 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -545,37 +545,38 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
mutex_unlock(&allocated_ptys_lock);
}
-int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
+/**
+ * devpts_pty_new -- create a new inode in /dev/pts/
+ * @ptmx_inode: inode of the master
+ * @device: major+minor of the node to be created
+ * @index: used as a name of the node
+ * @priv: what's given back by devpts_get_priv
+ *
+ * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
+ */
+struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
+ void *priv)
{
- /* tty layer puts index from devpts_new_index() in here */
- int number = tty->index;
- struct tty_driver *driver = tty->driver;
- dev_t device = MKDEV(driver->major, driver->minor_start+number);
struct dentry *dentry;
struct super_block *sb = pts_sb_from_inode(ptmx_inode);
- struct inode *inode = new_inode(sb);
+ struct inode *inode;
struct dentry *root = sb->s_root;
struct pts_fs_info *fsi = DEVPTS_SB(sb);
struct pts_mount_opts *opts = &fsi->mount_opts;
- int ret = 0;
char s[12];
- /* We're supposed to be given the slave end of a pty */
- BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY);
- BUG_ON(driver->subtype != PTY_TYPE_SLAVE);
-
+ inode = new_inode(sb);
if (!inode)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- inode->i_ino = number + 3;
+ inode->i_ino = index + 3;
inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
init_special_inode(inode, S_IFCHR|opts->mode, device);
- inode->i_private = tty;
- tty->driver_data = inode;
+ inode->i_private = priv;
- sprintf(s, "%d", number);
+ sprintf(s, "%d", index);
mutex_lock(&root->d_inode->i_mutex);
@@ -585,18 +586,24 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
fsnotify_create(root->d_inode, dentry);
} else {
iput(inode);
- ret = -ENOMEM;
+ inode = ERR_PTR(-ENOMEM);
}
mutex_unlock(&root->d_inode->i_mutex);
- return ret;
+ return inode;
}
-struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
+/**
+ * devpts_get_priv -- get private data for a slave
+ * @pts_inode: inode of the slave
+ *
+ * Returns whatever was passed as priv in devpts_pty_new for a given inode.
+ */
+void *devpts_get_priv(struct inode *pts_inode)
{
struct dentry *dentry;
- struct tty_struct *tty;
+ void *priv = NULL;
BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
@@ -605,18 +612,22 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
if (!dentry)
return NULL;
- tty = NULL;
if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
- tty = (struct tty_struct *)pts_inode->i_private;
+ priv = pts_inode->i_private;
dput(dentry);
- return tty;
+ return priv;
}
-void devpts_pty_kill(struct tty_struct *tty)
+/**
+ * devpts_pty_kill -- remove inode form /dev/pts/
+ * @inode: inode of the slave to be removed
+ *
+ * This is an inverse operation of devpts_pty_new.
+ */
+void devpts_pty_kill(struct inode *inode)
{
- struct inode *inode = tty->driver_data;
struct super_block *sb = pts_sb_from_inode(inode);
struct dentry *root = sb->s_root;
struct dentry *dentry;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f86c720dba0e..cf5b44b10c67 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -540,6 +540,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
sector_t fs_endblk; /* Into file, in filesystem-sized blocks */
unsigned long fs_count; /* Number of filesystem-sized blocks */
int create;
+ unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
/*
* If there was a memory error and we've overwritten all the
@@ -554,7 +555,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
fs_count = fs_endblk - fs_startblk + 1;
map_bh->b_state = 0;
- map_bh->b_size = fs_count << dio->inode->i_blkbits;
+ map_bh->b_size = fs_count << i_blkbits;
/*
* For writes inside i_size on a DIO_SKIP_HOLES filesystem we
@@ -1053,7 +1054,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
int seg;
size_t size;
unsigned long addr;
- unsigned blkbits = inode->i_blkbits;
+ unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
+ unsigned blkbits = i_blkbits;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
loff_t end = offset;
@@ -1149,7 +1151,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
dio->inode = inode;
dio->rw = rw;
sdio.blkbits = blkbits;
- sdio.blkfactor = inode->i_blkbits - blkbits;
+ sdio.blkfactor = i_blkbits - blkbits;
sdio.block_in_file = offset >> blkbits;
sdio.get_block = get_block;
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 1897eb1b4b6a..e4242c3f8486 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -1,6 +1,6 @@
menuconfig DLM
tristate "Distributed Lock Manager (DLM)"
- depends on EXPERIMENTAL && INET
+ depends on INET
depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
select IP_SCTP
help
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 871c1abf6029..77c0f70f8fe8 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -337,6 +337,7 @@ enum rsb_flags {
RSB_NEW_MASTER2,
RSB_RECOVER_CONVERT,
RSB_RECOVER_GRANT,
+ RSB_RECOVER_LVB_INVAL,
};
static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index b56950758188..a579f30f237d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -5393,6 +5393,13 @@ static void purge_dead_list(struct dlm_ls *ls, struct dlm_rsb *r,
if ((lkb->lkb_nodeid == nodeid_gone) ||
dlm_is_removed(ls, lkb->lkb_nodeid)) {
+ /* tell recover_lvb to invalidate the lvb
+ because a node holding EX/PW failed */
+ if ((lkb->lkb_exflags & DLM_LKF_VALBLK) &&
+ (lkb->lkb_grmode >= DLM_LOCK_PW)) {
+ rsb_set_flag(r, RSB_RECOVER_LVB_INVAL);
+ }
+
del_lkb(r, lkb);
/* this put should free the lkb */
@@ -6025,15 +6032,18 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
return error;
}
-/* The force flag allows the unlock to go ahead even if the lkb isn't granted.
- Regardless of what rsb queue the lock is on, it's removed and freed. */
+/* The FORCEUNLOCK flag allows the unlock to go ahead even if the lkb isn't
+ granted. Regardless of what rsb queue the lock is on, it's removed and
+ freed. The IVVALBLK flag causes the lvb on the resource to be invalidated
+ if our lock is PW/EX (it's ignored if our granted mode is smaller.) */
static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
struct dlm_args args;
int error;
- set_unlock_args(DLM_LKF_FORCEUNLOCK, lkb->lkb_ua, &args);
+ set_unlock_args(DLM_LKF_FORCEUNLOCK | DLM_LKF_IVVALBLK,
+ lkb->lkb_ua, &args);
error = unlock_lock(ls, lkb, &args);
if (error == -DLM_EUNLOCK)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 331ea4f94efd..dd87a31bcc21 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1385,7 +1385,6 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
struct connection *con;
struct writequeue_entry *e;
int offset = 0;
- int users = 0;
con = nodeid2con(nodeid, allocation);
if (!con)
@@ -1399,7 +1398,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
} else {
offset = e->end;
e->end += len;
- users = e->users++;
+ e->users++;
}
spin_unlock(&con->writequeue_lock);
@@ -1414,7 +1413,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
spin_lock(&con->writequeue_lock);
offset = e->end;
e->end += len;
- users = e->users++;
+ e->users++;
list_add_tail(&e->list, &con->writequeue);
spin_unlock(&con->writequeue_lock);
goto got_one;
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index 4a7a76e42fc3..aedea28a86a1 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -717,8 +717,14 @@ void dlm_recovered_lock(struct dlm_rsb *r)
* the VALNOTVALID flag if necessary, and determining the correct lvb contents
* based on the lvb's of the locks held on the rsb.
*
- * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it
- * was already set prior to recovery, it's not cleared, regardless of locks.
+ * RSB_VALNOTVALID is set in two cases:
+ *
+ * 1. we are master, but not new, and we purged an EX/PW lock held by a
+ * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL)
+ *
+ * 2. we are a new master, and there are only NL/CR locks left.
+ * (We could probably improve this by only invaliding in this way when
+ * the previous master left uncleanly. VMS docs mention that.)
*
* The LVB contents are only considered for changing when this is a new master
* of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with
@@ -734,6 +740,19 @@ static void recover_lvb(struct dlm_rsb *r)
int big_lock_exists = 0;
int lvblen = r->res_ls->ls_lvblen;
+ if (!rsb_flag(r, RSB_NEW_MASTER2) &&
+ rsb_flag(r, RSB_RECOVER_LVB_INVAL)) {
+ /* case 1 above */
+ rsb_set_flag(r, RSB_VALNOTVALID);
+ return;
+ }
+
+ if (!rsb_flag(r, RSB_NEW_MASTER2))
+ return;
+
+ /* we are the new master, so figure out if VALNOTVALID should
+ be set, and set the rsb lvb from the best lkb available. */
+
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
continue;
@@ -772,13 +791,10 @@ static void recover_lvb(struct dlm_rsb *r)
if (!lock_lvb_exists)
goto out;
+ /* lvb is invalidated if only NL/CR locks remain */
if (!big_lock_exists)
rsb_set_flag(r, RSB_VALNOTVALID);
- /* don't mess with the lvb unless we're the new master */
- if (!rsb_flag(r, RSB_NEW_MASTER2))
- goto out;
-
if (!r->res_lvbptr) {
r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
if (!r->res_lvbptr)
@@ -852,12 +868,19 @@ void dlm_recover_rsbs(struct dlm_ls *ls)
if (is_master(r)) {
if (rsb_flag(r, RSB_RECOVER_CONVERT))
recover_conversion(r);
+
+ /* recover lvb before granting locks so the updated
+ lvb/VALNOTVALID is presented in the completion */
+ recover_lvb(r);
+
if (rsb_flag(r, RSB_NEW_MASTER2))
recover_grant(r);
- recover_lvb(r);
count++;
+ } else {
+ rsb_clear_flag(r, RSB_VALNOTVALID);
}
rsb_clear_flag(r, RSB_RECOVER_CONVERT);
+ rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL);
rsb_clear_flag(r, RSB_NEW_MASTER2);
unlock_rsb(r);
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index da72250ddc1c..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
{
- return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD;
+ return op != EPOLL_CTL_DEL;
}
/* Initialize the poll safe wake up structure */
@@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
return 0;
}
-/*
- * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
- * had no event flags set, indicating that another thread may be currently
- * handling that item's events (in the case that EPOLLONESHOT was being
- * used). Otherwise a zero result indicates that the item has been disabled
- * from receiving events. A disabled item may be re-enabled via
- * EPOLL_CTL_MOD. Must be called with "mtx" held.
- */
-static int ep_disable(struct eventpoll *ep, struct epitem *epi)
-{
- int result = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ep->lock, flags);
- if (epi->event.events & ~EP_PRIVATE_BITS) {
- if (ep_is_linked(&epi->rdllink))
- list_del_init(&epi->rdllink);
- /* Ensure ep_poll_callback will not add epi back onto ready
- list: */
- epi->event.events &= EP_PRIVATE_BITS;
- }
- else
- result = -EBUSY;
- spin_unlock_irqrestore(&ep->lock, flags);
-
- return result;
-}
-
static void ep_free(struct eventpoll *ep)
{
struct rb_node *rbp;
@@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
rb_insert_color(&epi->rbn, &ep->rbr);
}
+
+
#define PATH_ARR_SIZE 5
/*
* These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
} else
error = -ENOENT;
break;
- case EPOLL_CTL_DISABLE:
- if (epi)
- error = ep_disable(ep, epi);
- else
- error = -ENOENT;
- break;
}
mutex_unlock(&ep->mtx);
diff --git a/fs/exec.c b/fs/exec.c
index 8b9011b67041..0039055b1fc6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1083,7 +1083,8 @@ int flush_old_exec(struct linux_binprm * bprm)
bprm->mm = NULL; /* We're using it now */
set_fs(USER_DS);
- current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
+ current->flags &=
+ ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE);
flush_thread();
current->personality &= ~bprm->per_clear;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 6c205d0c565b..fa04d023177e 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -469,7 +469,7 @@ static int parse_options(char *options, struct super_block *sb)
uid = make_kuid(current_user_ns(), option);
if (!uid_valid(uid)) {
ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option);
- return -1;
+ return 0;
}
sbi->s_resuid = uid;
@@ -480,7 +480,7 @@ static int parse_options(char *options, struct super_block *sb)
gid = make_kgid(current_user_ns(), option);
if (!gid_valid(gid)) {
ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option);
- return -1;
+ return 0;
}
sbi->s_resgid = gid;
break;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7320a66e958f..22548f56197b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -2101,8 +2101,9 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
end = start + (range->len >> sb->s_blocksize_bits) - 1;
minlen = range->minlen >> sb->s_blocksize_bits;
- if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) ||
- unlikely(start >= max_blks))
+ if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
+ start >= max_blks ||
+ range->len < sb->s_blocksize)
return -EINVAL;
if (end >= max_blks)
end = max_blks - 1;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 8f4fddac01a6..890b8947c546 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -46,8 +46,7 @@ static struct buffer_head *ext3_append(handle_t *handle,
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
- bh = ext3_bread(handle, inode, *block, 1, err);
- if (bh) {
+ if ((bh = ext3_dir_bread(handle, inode, *block, 1, err))) {
inode->i_size += inode->i_sb->s_blocksize;
EXT3_I(inode)->i_disksize = inode->i_size;
*err = ext3_journal_get_write_access(handle, bh);
@@ -339,8 +338,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
u32 hash;
frame->bh = NULL;
- if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+ if (!(bh = ext3_dir_bread(NULL, dir, 0, 0, err))) {
+ *err = ERR_BAD_DX_DIR;
goto fail;
+ }
root = (struct dx_root *) bh->b_data;
if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 &&
@@ -436,8 +437,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
frame->entries = entries;
frame->at = at;
if (!indirect--) return frame;
- if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+ if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(at), 0, err))) {
+ *err = ERR_BAD_DX_DIR;
goto fail2;
+ }
at = entries = ((struct dx_node *) bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit (dir)) {
ext3_warning(dir->i_sb, __func__,
@@ -535,8 +538,8 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
* block so no check is necessary
*/
while (num_frames--) {
- if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
- 0, &err)))
+ if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(p->at),
+ 0, &err)))
return err; /* Failure */
p++;
brelse (p->bh);
@@ -559,10 +562,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
{
struct buffer_head *bh;
struct ext3_dir_entry_2 *de, *top;
- int err, count = 0;
+ int err = 0, count = 0;
dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
- if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
+
+ if (!(bh = ext3_dir_bread(NULL, dir, block, 0, &err)))
return err;
de = (struct ext3_dir_entry_2 *) bh->b_data;
@@ -976,7 +980,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
return NULL;
do {
block = dx_get_block(frame->at);
- if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+ if (!(bh = ext3_dir_bread (NULL, dir, block, 0, err)))
goto errout;
retval = search_dirblock(bh, dir, entry,
@@ -1458,9 +1462,9 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
}
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
- bh = ext3_bread(handle, dir, block, 0, &retval);
- if(!bh)
+ if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval)))
return retval;
+
retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
if (retval != -ENOSPC)
return retval;
@@ -1500,7 +1504,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
entries = frame->entries;
at = frame->at;
- if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+ if (!(bh = ext3_dir_bread(handle, dir, dx_get_block(frame->at), 0, &err)))
goto cleanup;
BUFFER_TRACE(bh, "get_write_access");
@@ -1790,8 +1794,7 @@ retry:
inode->i_op = &ext3_dir_inode_operations;
inode->i_fop = &ext3_dir_operations;
inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
- dir_block = ext3_bread (handle, inode, 0, 1, &err);
- if (!dir_block)
+ if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err)))
goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
@@ -1859,7 +1862,7 @@ static int empty_dir (struct inode * inode)
sb = inode->i_sb;
if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
- !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
+ !(bh = ext3_dir_bread(NULL, inode, 0, 0, &err))) {
if (err)
ext3_error(inode->i_sb, __func__,
"error %d reading directory #%lu offset 0",
@@ -1890,9 +1893,8 @@ static int empty_dir (struct inode * inode)
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
brelse (bh);
- bh = ext3_bread (NULL, inode,
- offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
- if (!bh) {
+ if (!(bh = ext3_dir_bread (NULL, inode,
+ offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err))) {
if (err)
ext3_error(sb, __func__,
"error %d reading directory"
@@ -2388,7 +2390,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
goto end_rename;
}
retval = -EIO;
- dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
+ dir_bh = ext3_dir_bread(handle, old_inode, 0, 0, &retval);
if (!dir_bh)
goto end_rename;
if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h
index f2ce2b0065c9..46304d8c9f0a 100644
--- a/fs/ext3/namei.h
+++ b/fs/ext3/namei.h
@@ -6,3 +6,22 @@
*/
extern struct dentry *ext3_get_parent(struct dentry *child);
+
+static inline struct buffer_head *ext3_dir_bread(handle_t *handle,
+ struct inode *inode,
+ int block, int create,
+ int *err)
+{
+ struct buffer_head *bh;
+
+ bh = ext3_bread(handle, inode, block, create, err);
+
+ if (!bh && !(*err)) {
+ *err = -EIO;
+ ext3_error(inode->i_sb, __func__,
+ "Directory hole detected on inode %lu\n",
+ inode->i_ino);
+ return NULL;
+ }
+ return bh;
+}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 29e79713c7eb..5366393528df 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1001,7 +1001,7 @@ static int parse_options (char *options, struct super_block *sb,
uid = make_kuid(current_user_ns(), option);
if (!uid_valid(uid)) {
ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
- return -1;
+ return 0;
}
sbi->s_resuid = uid;
@@ -1012,7 +1012,7 @@ static int parse_options (char *options, struct super_block *sb,
gid = make_kgid(current_user_ns(), option);
if (!gid_valid(gid)) {
ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
- return -1;
+ return 0;
}
sbi->s_resgid = gid;
break;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1b5089067d01..cf1821784a16 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -174,8 +174,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
- ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8);
+ ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
return;
}
memset(bh->b_data, 0, sb->s_blocksize);
@@ -212,8 +211,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
*/
ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
sb->s_blocksize * 8, bh->b_data);
- ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8);
+ ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
}
@@ -350,7 +348,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
return;
}
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
- desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) {
+ desc, bh))) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
return;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 5c2d1813ebe9..3285aa5a706a 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -58,11 +58,12 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz)
+ struct buffer_head *bh)
{
__u32 hi;
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -84,8 +85,9 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz)
+ struct buffer_head *bh)
{
+ int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3ab2539b7b2e..3c20de1d59d0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1882,10 +1882,10 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct buffer_head *bh, int sz);
void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz);
+ struct buffer_head *bh);
int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz);
+ struct buffer_head *bh);
/* balloc.c */
extern void ext4_validate_block_bitmap(struct super_block *sb,
@@ -2063,8 +2063,7 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
extern int ext4_calculate_overhead(struct super_block *sb);
extern int ext4_superblock_csum_verify(struct super_block *sb,
struct ext4_super_block *es);
-extern void ext4_superblock_csum_set(struct super_block *sb,
- struct ext4_super_block *es);
+extern void ext4_superblock_csum_set(struct super_block *sb);
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern void ext4_kvfree(void *ptr);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index bfa65b49d424..b4323ba846b5 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -143,17 +143,13 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
int err = 0;
+ ext4_superblock_csum_set(sb);
if (ext4_handle_valid(handle)) {
- ext4_superblock_csum_set(sb,
- (struct ext4_super_block *)bh->b_data);
err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
- } else {
- ext4_superblock_csum_set(sb,
- (struct ext4_super_block *)bh->b_data);
+ } else
mark_buffer_dirty(bh);
- }
return err;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1c94cca35ed1..7011ac967208 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -52,6 +52,9 @@
#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
+#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
+#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
+
static __le32 ext4_extent_block_csum(struct inode *inode,
struct ext4_extent_header *eh)
{
@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle,
unsigned int ee_len, depth;
int err = 0;
+ BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
+ (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
+
ext_debug("ext4_split_extents_at: inode %lu, logical"
"block %llu\n", inode->i_ino, (unsigned long long)split);
@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle,
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- err = ext4_ext_zeroout(inode, &orig_ex);
+ if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
+ if (split_flag & EXT4_EXT_DATA_VALID1)
+ err = ext4_ext_zeroout(inode, ex2);
+ else
+ err = ext4_ext_zeroout(inode, ex);
+ } else
+ err = ext4_ext_zeroout(inode, &orig_ex);
+
if (err)
goto fix_extent_len;
/* update the extent length and mark as initialized */
@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle,
uninitialized = ext4_ext_is_uninitialized(ex);
if (map->m_lblk + map->m_len < ee_block + ee_len) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2;
+ if (split_flag & EXT4_EXT_DATA_VALID2)
+ split_flag1 |= EXT4_EXT_DATA_VALID1;
err = ext4_split_extent_at(handle, inode, path,
map->m_lblk + map->m_len, split_flag1, flags1);
if (err)
@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle,
return PTR_ERR(path);
if (map->m_lblk >= ee_block) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
+ EXT4_EXT_DATA_VALID2);
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1;
if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
split_flag |= EXT4_EXT_MARK_UNINIT2;
-
+ if (flags & EXT4_GET_BLOCKS_CONVERT)
+ split_flag |= EXT4_EXT_DATA_VALID2;
flags |= EXT4_GET_BLOCKS_PRE_IO;
return ext4_split_extent(handle, inode, path, map, split_flag, flags);
}
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path)
+ struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path)
{
struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ unsigned int ee_len;
int depth;
int err = 0;
depth = ext_depth(inode);
ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
+ (unsigned long long)ee_block, ee_len);
+
+ /* If extent is larger than requested then split is required */
+ if (ee_block != map->m_lblk || ee_len > map->m_len) {
+ err = ext4_split_unwritten_extents(handle, inode, map, path,
+ EXT4_GET_BLOCKS_CONVERT);
+ if (err < 0)
+ goto out;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
+ }
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ }
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
}
/* IO end_io complete, convert the filled extent to written */
if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
- ret = ext4_convert_unwritten_extents_endio(handle, inode,
+ ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
path);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4428,6 +4463,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
*/
if (len <= EXT_UNINIT_MAX_LEN << blkbits)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
+
+ /* Prevent race condition between unwritten */
+ ext4_flush_unwritten_io(inode);
retry:
while (ret >= 0 && ret < max_blocks) {
map.m_lblk = map.m_lblk + ret;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fa36372f3fdf..3a100e7a62a8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -725,6 +725,10 @@ repeat_in_this_group:
"inode=%lu", ino + 1);
continue;
}
+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+ if (err)
+ goto fail;
ext4_lock_group(sb, group);
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
ext4_unlock_group(sb, group);
@@ -738,6 +742,11 @@ repeat_in_this_group:
goto out;
got:
+ BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+ if (err)
+ goto fail;
+
/* We may have to initialize the block bitmap if it isn't already */
if (ext4_has_group_desc_csum(sb) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -762,9 +771,7 @@ got:
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
ext4_block_bitmap_csum_set(sb, group, gdp,
- block_bitmap_bh,
- EXT4_BLOCKS_PER_GROUP(sb) /
- 8);
+ block_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
@@ -773,11 +780,6 @@ got:
goto fail;
}
- BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
- if (err)
- goto fail;
-
BUFFER_TRACE(group_desc_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, group_desc_bh);
if (err)
@@ -825,11 +827,6 @@ got:
}
ext4_unlock_group(sb, group);
- BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
- if (err)
- goto fail;
-
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
if (err)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f8b27bf80aca..526e55358606 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2805,8 +2805,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
}
len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
ext4_free_group_clusters_set(sb, gdp, len);
- ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8);
+ ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
@@ -4666,8 +4665,7 @@ do_more:
ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
ext4_free_group_clusters_set(sb, gdp, ret);
- ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8);
+ ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
@@ -4811,8 +4809,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
mb_free_blocks(NULL, &e4b, bit, count);
blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
ext4_free_group_clusters_set(sb, desc, blk_free_count);
- ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8);
+ ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, desc);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter,
@@ -4993,8 +4990,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
minlen = EXT4_NUM_B2C(EXT4_SB(sb),
range->minlen >> sb->s_blocksize_bits);
- if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
- unlikely(start >= max_blks))
+ if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
+ start >= max_blks ||
+ range->len < sb->s_blocksize)
return -EINVAL;
if (end >= max_blks)
end = max_blks - 1;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 7a75e1086961..47bf06a2765d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
bh = ext4_get_bitmap(sb, group_data->block_bitmap);
if (!bh)
return -EIO;
- ext4_block_bitmap_csum_set(sb, group, gdp, bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8);
+ ext4_block_bitmap_csum_set(sb, group, gdp, bh);
brelse(bh);
return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7265a0367476..80928f716850 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -143,9 +143,10 @@ int ext4_superblock_csum_verify(struct super_block *sb,
return es->s_checksum == ext4_superblock_csum(sb, es);
}
-void ext4_superblock_csum_set(struct super_block *sb,
- struct ext4_super_block *es)
+void ext4_superblock_csum_set(struct super_block *sb)
{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
@@ -1963,7 +1964,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
sbi->s_log_groups_per_flex = 0;
return 1;
}
- groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+ groups_per_flex = 1U << sbi->s_log_groups_per_flex;
err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
if (err)
@@ -4381,7 +4382,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
BUFFER_TRACE(sbh, "marking dirty");
- ext4_superblock_csum_set(sb, es);
+ ext4_superblock_csum_set(sb);
mark_buffer_dirty(sbh);
if (sync) {
error = sync_dirty_buffer(sbh);
diff --git a/fs/file.c b/fs/file.c
index d3b5fa80b71b..eff23162485f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -685,7 +685,6 @@ void do_close_on_exec(struct files_struct *files)
struct fdtable *fdt;
/* exec unshares first */
- BUG_ON(atomic_read(&files->count) != 1);
spin_lock(&files->file_lock);
for (i = 0; ; i++) {
unsigned long set;
@@ -900,7 +899,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
return __close_fd(files, fd);
if (fd >= rlimit(RLIMIT_NOFILE))
- return -EMFILE;
+ return -EBADF;
spin_lock(&files->file_lock);
err = expand_files(files, fd);
@@ -926,7 +925,7 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
return -EINVAL;
if (newfd >= rlimit(RLIMIT_NOFILE))
- return -EMFILE;
+ return -EBADF;
spin_lock(&files->file_lock);
err = expand_files(files, newfd);
@@ -995,16 +994,18 @@ int iterate_fd(struct files_struct *files, unsigned n,
const void *p)
{
struct fdtable *fdt;
- struct file *file;
int res = 0;
if (!files)
return 0;
spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- while (!res && n < fdt->max_fds) {
- file = rcu_dereference_check_fdtable(files, fdt->fd[n++]);
- if (file)
- res = f(p, file, n);
+ for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
+ struct file *file;
+ file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
+ if (!file)
+ continue;
+ res = f(p, file, n);
+ if (res)
+ break;
}
spin_unlock(&files->file_lock);
return res;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 51ea267d444c..3e3422f7f0a4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
static void inode_sync_complete(struct inode *inode)
{
inode->i_state &= ~I_SYNC;
+ /* If inode is clean an unused, put it into LRU now... */
+ inode_add_lru(inode);
/* Waiters must see I_SYNC cleared before being woken up */
smp_mb();
wake_up_bit(&inode->i_state, __I_SYNC);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 0def0504afc1..e056b4ce4877 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
struct gfs2_holder i_gh;
int error;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
- error = gfs2_glock_nq(&i_gh);
- if (error == 0) {
- file_accessed(file);
- gfs2_glock_dq(&i_gh);
- }
- gfs2_holder_uninit(&i_gh);
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
+ &i_gh);
if (error)
return error;
+ /* grab lock to update inode */
+ gfs2_glock_dq_uninit(&i_gh);
+ file_accessed(file);
}
vma->vm_ops = &gfs2_vm_ops;
@@ -677,10 +675,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
size_t writesize = iov_length(iov, nr_segs);
struct dentry *dentry = file->f_dentry;
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_sbd *sdp;
int ret;
- sdp = GFS2_SB(file->f_mapping->host);
ret = gfs2_rs_alloc(ip);
if (ret)
return ret;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e6c2fd53cab2..0f22d09f358d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -768,7 +768,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->host = s->s_bdev->bd_inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
- mapping->assoc_mapping = NULL;
+ mapping->private_data = NULL;
mapping->backing_dev_info = s->s_bdi;
mapping->writeback_index = 0;
}
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ff95a2d54ee..9ceccb1595a3 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
struct gfs2_meta_header *mh;
struct gfs2_trans *tr;
- lock_buffer(bd->bd_bh);
- gfs2_log_lock(sdp);
tr = current->journal_info;
tr->tr_touched = 1;
if (!list_empty(&bd->bd_list))
- goto out;
+ return;
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
@@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
sdp->sd_log_num_buf++;
list_add(&bd->bd_list, &sdp->sd_log_le_buf);
tr->tr_num_buf_new++;
-out:
- gfs2_log_unlock(sdp);
- unlock_buffer(bd->bd_bh);
}
static void gfs2_check_magic(struct buffer_head *bh)
@@ -621,7 +616,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
{
- struct gfs2_log_descriptor *ld;
struct gfs2_meta_header *mh;
unsigned int offset;
struct list_head *head = &sdp->sd_log_le_revoke;
@@ -634,7 +628,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
- ld = page_address(page);
offset = sizeof(struct gfs2_log_descriptor);
list_for_each_entry(bd, head, bd_list) {
@@ -777,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
- lock_buffer(bd->bd_bh);
- gfs2_log_lock(sdp);
if (tr)
tr->tr_touched = 1;
if (!list_empty(&bd->bd_list))
- goto out;
+ return;
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
if (gfs2_is_jdata(ip)) {
@@ -793,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
} else {
list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
}
-out:
- gfs2_log_unlock(sdp);
- unlock_buffer(bd->bd_bh);
}
/**
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 40c4b0d42fa8..c5af8e18f27a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
struct gfs2_quota_data **qd;
int error;
- if (ip->i_res == NULL)
- gfs2_rs_alloc(ip);
+ if (ip->i_res == NULL) {
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ return error;
+ }
qd = ip->i_res->rs_qa_qd;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3cc402ce6fea..38fe18f2f055 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
*/
int gfs2_rs_alloc(struct gfs2_inode *ip)
{
- int error = 0;
struct gfs2_blkreserv *res;
if (ip->i_res)
@@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
if (!res)
- error = -ENOMEM;
+ return -ENOMEM;
RB_CLEAR_NODE(&res->rs_node);
@@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
else
ip->i_res = res;
up_write(&ip->i_rw_mutex);
- return error;
+ return 0;
}
static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1263,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
int ret = 0;
u64 amt;
u64 trimmed = 0;
+ u64 start, end, minlen;
unsigned int x;
+ unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1271,19 +1272,25 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
- if (argp == NULL) {
- r.start = 0;
- r.len = ULLONG_MAX;
- r.minlen = 0;
- } else if (copy_from_user(&r, argp, sizeof(r)))
+ if (copy_from_user(&r, argp, sizeof(r)))
return -EFAULT;
ret = gfs2_rindex_update(sdp);
if (ret)
return ret;
- rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
- rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
+ start = r.start >> bs_shift;
+ end = start + (r.len >> bs_shift);
+ minlen = max_t(u64, r.minlen,
+ q->limits.discard_granularity) >> bs_shift;
+
+ rgd = gfs2_blk2rgrpd(sdp, start, 0);
+ rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
+
+ if (end <= start ||
+ minlen > sdp->sd_max_rg_data ||
+ start > rgd_end->rd_data0 + rgd_end->rd_data)
+ return -EINVAL;
while (1) {
@@ -1295,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
/* Trim each bitmap in the rgrp */
for (x = 0; x < rgd->rd_length; x++) {
struct gfs2_bitmap *bi = rgd->rd_bits + x;
- ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
+ ret = gfs2_rgrp_send_discards(sdp,
+ rgd->rd_data0, NULL, bi, minlen,
+ &amt);
if (ret) {
gfs2_glock_dq_uninit(&gh);
goto out;
@@ -1324,7 +1333,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
out:
r.len = trimmed << 9;
- if (argp && copy_to_user(argp, &r, sizeof(r)))
+ if (copy_to_user(argp, &r, sizeof(r)))
return -EFAULT;
return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bc737261f234..d6488674d916 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
return;
}
need_unlock = 1;
- }
+ } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
+ return;
if (current->journal_info == NULL) {
ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index adbd27875ef9..413627072f36 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_bufdata *bd;
+ lock_buffer(bh);
+ gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd)
gfs2_assert(sdp, bd->bd_gl == gl);
else {
+ gfs2_log_unlock(sdp);
+ unlock_buffer(bh);
gfs2_attach_bufdata(gl, bh, meta);
bd = bh->b_private;
+ lock_buffer(bh);
+ gfs2_log_lock(sdp);
}
lops_add(sdp, bd);
+ gfs2_log_unlock(sdp);
+ unlock_buffer(bh);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c5bc355d8243..4a55f35a6ced 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -151,8 +151,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long start_addr;
struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -173,39 +173,13 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return addr;
}
- if (len > mm->cached_hole_size)
- start_addr = mm->free_area_cache;
- else {
- start_addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- }
-
-full_search:
- addr = ALIGN(start_addr, huge_page_size(h));
-
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
- /* At this point: (!vma || addr < vma->vm_end). */
- if (TASK_SIZE - len < addr) {
- /*
- * Start a new search - just in case we missed
- * some holes.
- */
- if (start_addr != TASK_UNMAPPED_BASE) {
- start_addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
- }
-
- if (!vma || addr + len <= vma->vm_start) {
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = ALIGN(vma->vm_end, huge_page_size(h));
- }
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
}
#endif
@@ -608,11 +582,11 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
int rc;
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
- if (rc)
+ if (rc != MIGRATEPAGE_SUCCESS)
return rc;
migrate_page_copy(newpage, page);
- return 0;
+ return MIGRATEPAGE_SUCCESS;
}
static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -923,7 +897,7 @@ static struct file_system_type hugetlbfs_fs_type = {
.kill_sb = kill_litter_super,
};
-static struct vfsmount *hugetlbfs_vfsmount;
+static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
static int can_do_hugetlb_shm(void)
{
@@ -932,9 +906,22 @@ static int can_do_hugetlb_shm(void)
return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
}
+static int get_hstate_idx(int page_size_log)
+{
+ struct hstate *h;
+
+ if (!page_size_log)
+ return default_hstate_idx;
+ h = size_to_hstate(1 << page_size_log);
+ if (!h)
+ return -1;
+ return h - hstates;
+}
+
struct file *hugetlb_file_setup(const char *name, unsigned long addr,
size_t size, vm_flags_t acctflag,
- struct user_struct **user, int creat_flags)
+ struct user_struct **user,
+ int creat_flags, int page_size_log)
{
int error = -ENOMEM;
struct file *file;
@@ -944,9 +931,14 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
struct qstr quick_string;
struct hstate *hstate;
unsigned long num_pages;
+ int hstate_idx;
+
+ hstate_idx = get_hstate_idx(page_size_log);
+ if (hstate_idx < 0)
+ return ERR_PTR(-ENODEV);
*user = NULL;
- if (!hugetlbfs_vfsmount)
+ if (!hugetlbfs_vfsmount[hstate_idx])
return ERR_PTR(-ENOENT);
if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
@@ -963,7 +955,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
}
}
- root = hugetlbfs_vfsmount->mnt_root;
+ root = hugetlbfs_vfsmount[hstate_idx]->mnt_root;
quick_string.name = name;
quick_string.len = strlen(quick_string.name);
quick_string.hash = 0;
@@ -971,7 +963,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
if (!path.dentry)
goto out_shm_unlock;
- path.mnt = mntget(hugetlbfs_vfsmount);
+ path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
error = -ENOSPC;
inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0);
if (!inode)
@@ -1011,8 +1003,9 @@ out_shm_unlock:
static int __init init_hugetlbfs_fs(void)
{
+ struct hstate *h;
int error;
- struct vfsmount *vfsmount;
+ int i;
error = bdi_init(&hugetlbfs_backing_dev_info);
if (error)
@@ -1029,14 +1022,26 @@ static int __init init_hugetlbfs_fs(void)
if (error)
goto out;
- vfsmount = kern_mount(&hugetlbfs_fs_type);
+ i = 0;
+ for_each_hstate(h) {
+ char buf[50];
+ unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10);
- if (!IS_ERR(vfsmount)) {
- hugetlbfs_vfsmount = vfsmount;
- return 0;
- }
+ snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb);
+ hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
+ buf);
- error = PTR_ERR(vfsmount);
+ if (IS_ERR(hugetlbfs_vfsmount[i])) {
+ pr_err("hugetlb: Cannot mount internal hugetlbfs for "
+ "page size %uK", ps_kb);
+ error = PTR_ERR(hugetlbfs_vfsmount[i]);
+ hugetlbfs_vfsmount[i] = NULL;
+ }
+ i++;
+ }
+ /* Non default hstates are optional */
+ if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx]))
+ return 0;
out:
kmem_cache_destroy(hugetlbfs_inode_cachep);
@@ -1047,13 +1052,19 @@ static int __init init_hugetlbfs_fs(void)
static void __exit exit_hugetlbfs_fs(void)
{
+ struct hstate *h;
+ int i;
+
+
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache.
*/
rcu_barrier();
kmem_cache_destroy(hugetlbfs_inode_cachep);
- kern_unmount(hugetlbfs_vfsmount);
+ i = 0;
+ for_each_hstate(h)
+ kern_unmount(hugetlbfs_vfsmount[i++]);
unregister_filesystem(&hugetlbfs_fs_type);
bdi_destroy(&hugetlbfs_backing_dev_info);
}
diff --git a/fs/inode.c b/fs/inode.c
index b03c71957246..14084b72b259 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,7 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
- mapping->assoc_mapping = NULL;
+ mapping->private_data = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
mapping->writeback_index = 0;
@@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode)
spin_unlock(&inode->i_sb->s_inode_lru_lock);
}
+/*
+ * Add inode to LRU if needed (inode is unused and clean).
+ *
+ * Needs inode->i_lock held.
+ */
+void inode_add_lru(struct inode *inode)
+{
+ if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
+ !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
+ inode_lru_list_add(inode);
+}
+
+
static void inode_lru_list_del(struct inode *inode)
{
spin_lock(&inode->i_sb->s_inode_lru_lock);
@@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode)
if (!drop && (sb->s_flags & MS_ACTIVE)) {
inode->i_state |= I_REFERENCED;
- if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- inode_lru_list_add(inode);
+ inode_add_lru(inode);
spin_unlock(&inode->i_lock);
return;
}
diff --git a/fs/internal.h b/fs/internal.h
index 916b7cbf3e3e..2f6af7f645eb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f);
* inode.c
*/
extern spinlock_t inode_sb_list_lock;
+extern void inode_add_lru(struct inode *inode);
/*
* fs-writeback.c
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 78b7f84241d4..7f5120bf0ec2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1961,7 +1961,9 @@ retry:
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock);
+ unlock_buffer(bh);
log_wait_commit(journal, tid);
+ lock_buffer(bh);
goto retry;
}
/*
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 60ef3fb707ff..1506673c087e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page *pg;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ struct jffs2_raw_inode ri;
+ uint32_t alloc_len = 0;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
uint32_t pageofs = index << PAGE_CACHE_SHIFT;
int ret = 0;
+ jffs2_dbg(1, "%s()\n", __func__);
+
+ if (pageofs > inode->i_size) {
+ ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+ ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+ if (ret)
+ return ret;
+ }
+
+ mutex_lock(&f->sem);
pg = grab_cache_page_write_begin(mapping, index, flags);
- if (!pg)
+ if (!pg) {
+ if (alloc_len)
+ jffs2_complete_reservation(c);
+ mutex_unlock(&f->sem);
return -ENOMEM;
+ }
*pagep = pg;
- jffs2_dbg(1, "%s()\n", __func__);
-
- if (pageofs > inode->i_size) {
+ if (alloc_len) {
/* Make new hole frag from old EOF to new page */
- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
- struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
- uint32_t alloc_len;
jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
(unsigned int)inode->i_size, pageofs);
- ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
- ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
- if (ret)
- goto out_page;
-
- mutex_lock(&f->sem);
memset(&ri, 0, sizeof(ri));
ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
if (IS_ERR(fn)) {
ret = PTR_ERR(fn);
jffs2_complete_reservation(c);
- mutex_unlock(&f->sem);
goto out_page;
}
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
jffs2_mark_node_obsolete(c, fn->raw);
jffs2_free_full_dnode(fn);
jffs2_complete_reservation(c);
- mutex_unlock(&f->sem);
goto out_page;
}
jffs2_complete_reservation(c);
inode->i_size = pageofs;
- mutex_unlock(&f->sem);
}
/*
@@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
* case of a short-copy.
*/
if (!PageUptodate(pg)) {
- mutex_lock(&f->sem);
ret = jffs2_do_readpage_nolock(inode, pg);
- mutex_unlock(&f->sem);
if (ret)
goto out_page;
}
+ mutex_unlock(&f->sem);
jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
return ret;
out_page:
unlock_page(pg);
page_cache_release(pg);
+ mutex_unlock(&f->sem);
return ret;
}
diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c
index 9947563e4175..dfcd50304559 100644
--- a/fs/jfs/jfs_discard.c
+++ b/fs/jfs/jfs_discard.c
@@ -83,7 +83,7 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
struct super_block *sb = ipbmap->i_sb;
int agno, agno_end;
- s64 start, end, minlen;
+ u64 start, end, minlen;
u64 trimmed = 0;
/**
@@ -93,15 +93,19 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
* minlen: minimum extent length in Bytes
*/
start = range->start >> sb->s_blocksize_bits;
- if (start < 0)
- start = 0;
end = start + (range->len >> sb->s_blocksize_bits) - 1;
- if (end >= bmp->db_mapsize)
- end = bmp->db_mapsize - 1;
minlen = range->minlen >> sb->s_blocksize_bits;
- if (minlen <= 0)
+ if (minlen == 0)
minlen = 1;
+ if (minlen > bmp->db_agsize ||
+ start >= bmp->db_mapsize ||
+ range->len < sb->s_blocksize)
+ return -EINVAL;
+
+ if (end >= bmp->db_mapsize)
+ end = bmp->db_mapsize - 1;
+
/**
* we trim all ag's within the range
*/
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index d269ada7670e..982d2676e1f8 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr,
{
__be32 *p;
- BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
+ WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
p = xdr_reserve_space(xdr, 4);
*p = stat;
}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e4fb3ba5a58a..3d7e09bcc0e9 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -85,29 +85,38 @@ static struct rpc_clnt *nsm_create(struct net *net)
return rpc_create(&args);
}
+static struct rpc_clnt *nsm_client_set(struct lockd_net *ln,
+ struct rpc_clnt *clnt)
+{
+ spin_lock(&ln->nsm_clnt_lock);
+ if (ln->nsm_users == 0) {
+ if (clnt == NULL)
+ goto out;
+ ln->nsm_clnt = clnt;
+ }
+ clnt = ln->nsm_clnt;
+ ln->nsm_users++;
+out:
+ spin_unlock(&ln->nsm_clnt_lock);
+ return clnt;
+}
+
static struct rpc_clnt *nsm_client_get(struct net *net)
{
- static DEFINE_MUTEX(nsm_create_mutex);
- struct rpc_clnt *clnt;
+ struct rpc_clnt *clnt, *new;
struct lockd_net *ln = net_generic(net, lockd_net_id);
- spin_lock(&ln->nsm_clnt_lock);
- if (ln->nsm_users) {
- ln->nsm_users++;
- clnt = ln->nsm_clnt;
- spin_unlock(&ln->nsm_clnt_lock);
+ clnt = nsm_client_set(ln, NULL);
+ if (clnt != NULL)
goto out;
- }
- spin_unlock(&ln->nsm_clnt_lock);
- mutex_lock(&nsm_create_mutex);
- clnt = nsm_create(net);
- if (!IS_ERR(clnt)) {
- ln->nsm_clnt = clnt;
- smp_wmb();
- ln->nsm_users = 1;
- }
- mutex_unlock(&nsm_create_mutex);
+ clnt = new = nsm_create(net);
+ if (IS_ERR(clnt))
+ goto out;
+
+ clnt = nsm_client_set(ln, new);
+ if (clnt != new)
+ rpc_shutdown_client(new);
out:
return clnt;
}
@@ -115,18 +124,16 @@ out:
static void nsm_client_put(struct net *net)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
- struct rpc_clnt *clnt = ln->nsm_clnt;
- int shutdown = 0;
+ struct rpc_clnt *clnt = NULL;
spin_lock(&ln->nsm_clnt_lock);
- if (ln->nsm_users) {
- if (--ln->nsm_users)
- ln->nsm_clnt = NULL;
- shutdown = !ln->nsm_users;
+ ln->nsm_users--;
+ if (ln->nsm_users == 0) {
+ clnt = ln->nsm_clnt;
+ ln->nsm_clnt = NULL;
}
spin_unlock(&ln->nsm_clnt_lock);
-
- if (shutdown)
+ if (clnt != NULL)
rpc_shutdown_client(clnt);
}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3009a365e082..21171f0c6477 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -68,7 +68,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
+ error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
+ if (error != 0)
goto no_locks;
*filp = file;
diff --git a/fs/namei.c b/fs/namei.c
index d1895f308156..5f4cdf3ad913 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -705,8 +705,8 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
path_put(link);
}
-int sysctl_protected_symlinks __read_mostly = 1;
-int sysctl_protected_hardlinks __read_mostly = 1;
+int sysctl_protected_symlinks __read_mostly = 0;
+int sysctl_protected_hardlinks __read_mostly = 0;
/**
* may_follow_link - Check symlink following for unsafe situations
@@ -2131,6 +2131,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
if (!len)
return ERR_PTR(-EACCES);
+ if (unlikely(name[0] == '.')) {
+ if (len < 2 || (len == 2 && name[1] == '.'))
+ return ERR_PTR(-EACCES);
+ }
+
while (len--) {
c = *(const unsigned char *)name++;
if (c == '/' || c == '\0')
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 9a521fb39869..5088b57b078a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -241,7 +241,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
svc_exit_thread(cb_info->rqst);
cb_info->rqst = NULL;
cb_info->task = NULL;
- return PTR_ERR(cb_info->task);
+ return ret;
}
dprintk("nfs_callback_up: service started\n");
return 0;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ce8cb926526b..b9e66b7e0c14 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -450,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
nfs_refresh_inode(dentry->d_inode, entry->fattr);
goto out;
} else {
- d_drop(dentry);
+ if (d_invalidate(dentry) != 0)
+ goto out;
dput(dentry);
}
}
@@ -1100,6 +1101,8 @@ out_set_verifier:
out_zap_parent:
nfs_zap_caches(dir);
out_bad:
+ nfs_free_fattr(fattr);
+ nfs_free_fhandle(fhandle);
nfs_mark_for_revalidate(dir);
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
@@ -1112,8 +1115,6 @@ out_zap_parent:
shrink_dcache_parent(dentry);
}
d_drop(dentry);
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fhandle);
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
__func__, dentry->d_parent->d_name.name,
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 31c26c4dcc23..ca4b11ec87a2 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -217,7 +217,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
{
char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
struct nfs_dns_ent key, *item;
- unsigned long ttl;
+ unsigned int ttl;
ssize_t len;
int ret = -EINVAL;
@@ -240,7 +240,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
key.namelen = len;
memset(&key.h, 0, sizeof(key.h));
- ttl = get_expiry(&buf);
+ if (get_uint(&buf, &ttl) < 0)
+ goto out;
if (ttl == 0)
goto out;
key.h.expiry_time = ttl + seconds_since_boot();
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5c7325c5c5e6..6fa01aea2488 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -685,7 +685,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
dput(ctx->dentry);
- nfs_sb_deactive(sb);
+ if (is_sync)
+ nfs_sb_deactive(sb);
+ else
+ nfs_sb_deactive_async(sb);
kfree(ctx->mdsthreshold);
kfree(ctx);
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 59b133c5d652..05521cadac2e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -351,10 +351,12 @@ extern int __init register_nfs_fs(void);
extern void __exit unregister_nfs_fs(void);
extern void nfs_sb_active(struct super_block *sb);
extern void nfs_sb_deactive(struct super_block *sb);
+extern void nfs_sb_deactive_async(struct super_block *sb);
/* namespace.c */
+#define NFS_PATH_CANONICAL 1
extern char *nfs_path(char **p, struct dentry *dentry,
- char *buffer, ssize_t buflen);
+ char *buffer, ssize_t buflen, unsigned flags);
extern struct vfsmount *nfs_d_automount(struct path *path);
struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
@@ -498,7 +500,7 @@ static inline char *nfs_devname(struct dentry *dentry,
char *buffer, ssize_t buflen)
{
char *dummy;
- return nfs_path(&dummy, dentry, buffer, buflen);
+ return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL);
}
/*
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 8e65c7f1f87c..015f71f8f62c 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info)
else
msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
- status = rpc_call_sync(mnt_clnt, &msg, 0);
+ status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT);
rpc_shutdown_client(mnt_clnt);
if (status < 0)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 655925373b91..dd057bc6b65b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -33,6 +33,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* @dentry - pointer to dentry
* @buffer - result buffer
* @buflen - length of buffer
+ * @flags - options (see below)
*
* Helper function for constructing the server pathname
* by arbitrary hashed dentry.
@@ -40,8 +41,14 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* This is mainly for use in figuring out the path on the
* server side when automounting on top of an existing partition
* and in generating /proc/mounts and friends.
+ *
+ * Supported flags:
+ * NFS_PATH_CANONICAL: ensure there is exactly one slash after
+ * the original device (export) name
+ * (if unset, the original name is returned verbatim)
*/
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
+char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
+ unsigned flags)
{
char *end;
int namelen;
@@ -74,7 +81,7 @@ rename_retry:
rcu_read_unlock();
goto rename_retry;
}
- if (*end != '/') {
+ if ((flags & NFS_PATH_CANONICAL) && *end != '/') {
if (--buflen < 0) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
@@ -91,9 +98,11 @@ rename_retry:
return end;
}
namelen = strlen(base);
- /* Strip off excess slashes in base string */
- while (namelen > 0 && base[namelen - 1] == '/')
- namelen--;
+ if (flags & NFS_PATH_CANONICAL) {
+ /* Strip off excess slashes in base string */
+ while (namelen > 0 && base[namelen - 1] == '/')
+ namelen--;
+ }
buflen -= namelen;
if (buflen < 0) {
spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 52d847212066..2e45fd9c02a3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -122,12 +122,21 @@ static void filelayout_reset_read(struct nfs_read_data *data)
}
}
+static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
+{
+ if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+ return;
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
+ pnfs_return_layout(inode);
+}
+
static int filelayout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg)
{
- struct inode *inode = lseg->pls_layout->plh_inode;
+ struct pnfs_layout_hdr *lo = lseg->pls_layout;
+ struct inode *inode = lo->plh_inode;
struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs_client *mds_client = mds_server->nfs_client;
@@ -204,10 +213,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
- _pnfs_return_layout(inode);
+ set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
rpc_wake_up(&tbl->slot_tbl_waitq);
- nfs4_ds_disconnect(clp);
/* fall through */
default:
reset:
@@ -331,7 +338,9 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data)
static void filelayout_read_release(void *data)
{
struct nfs_read_data *rdata = data;
+ struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
+ filelayout_fenceme(lo->plh_inode, lo);
nfs_put_client(rdata->ds_clp);
rdata->header->mds_ops->rpc_release(data);
}
@@ -429,7 +438,9 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data)
static void filelayout_write_release(void *data)
{
struct nfs_write_data *wdata = data;
+ struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
+ filelayout_fenceme(lo->plh_inode, lo);
nfs_put_client(wdata->ds_clp);
wdata->header->mds_ops->rpc_release(data);
}
@@ -739,7 +750,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
goto out_err;
if (fl->num_fh > 0) {
- fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
+ fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]),
gfp_flags);
if (!fl->fh_array)
goto out_err;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index dca47d786710..8c07241fe52b 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -149,6 +149,5 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
-void nfs4_ds_disconnect(struct nfs_client *clp);
#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3336d5eaf879..a8eaa9b7bb0f 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -149,28 +149,6 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
}
/*
- * Lookup DS by nfs_client pointer. Zero data server client pointer
- */
-void nfs4_ds_disconnect(struct nfs_client *clp)
-{
- struct nfs4_pnfs_ds *ds;
- struct nfs_client *found = NULL;
-
- dprintk("%s clp %p\n", __func__, clp);
- spin_lock(&nfs4_ds_cache_lock);
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
- if (ds->ds_clp && ds->ds_clp == clp) {
- found = ds->ds_clp;
- ds->ds_clp = NULL;
- }
- spin_unlock(&nfs4_ds_cache_lock);
- if (found) {
- set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
- nfs_put_client(clp);
- }
-}
-
-/*
* Create an rpc connection to the nfs4_pnfs_ds data server
* Currently only supports IPv4 and IPv6 addresses
*/
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 6a83780e0ce6..549462e5b9b0 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -5,6 +5,7 @@
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
+#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 79fbb61ce202..1e09eb78543b 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end)
static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
{
char *limit;
- char *path = nfs_path(&limit, dentry, buffer, buflen);
+ char *path = nfs_path(&limit, dentry, buffer, buflen,
+ NFS_PATH_CANONICAL);
if (!IS_ERR(path)) {
char *path_component = nfs_path_component(path, limit);
if (path_component)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 68b21d81b7ac..5eec4429970c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -339,8 +339,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
nfs4_schedule_session_recovery(clp->cl_session, errorcode);
- exception->retry = 1;
- break;
+ goto wait_on_recovery;
#endif /* defined(CONFIG_NFS_V4_1) */
case -NFS4ERR_FILE_OPEN:
if (exception->timeout > HZ) {
@@ -1572,9 +1571,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->o_arg.server,
&data->o_arg.seq_args,
- &data->o_res.seq_res, task))
- return;
- rpc_call_start(task);
+ &data->o_res.seq_res,
+ task) != 0)
+ nfs_release_seqid(data->o_arg.seqid);
+ else
+ rpc_call_start(task);
return;
unlock_no_action:
rcu_read_unlock();
@@ -1748,7 +1749,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
/* even though OPEN succeeded, access is denied. Close the file */
nfs4_close_state(state, fmode);
- return -NFS4ERR_ACCESS;
+ return -EACCES;
}
/*
@@ -2196,7 +2197,7 @@ static void nfs4_free_closedata(void *data)
nfs4_put_open_state(calldata->state);
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
- nfs_sb_deactive(sb);
+ nfs_sb_deactive_async(sb);
kfree(calldata);
}
@@ -2296,9 +2297,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (nfs4_setup_sequence(NFS_SERVER(inode),
&calldata->arg.seq_args,
&calldata->res.seq_res,
- task))
- goto out;
- rpc_call_start(task);
+ task) != 0)
+ nfs_release_seqid(calldata->arg.seqid);
+ else
+ rpc_call_start(task);
out:
dprintk("%s: done!\n", __func__);
}
@@ -4529,6 +4531,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
rpc_restart_call_prepare(task);
}
+ nfs_release_seqid(calldata->arg.seqid);
}
static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -4545,9 +4548,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
calldata->timestamp = jiffies;
if (nfs4_setup_sequence(calldata->server,
&calldata->arg.seq_args,
- &calldata->res.seq_res, task))
- return;
- rpc_call_start(task);
+ &calldata->res.seq_res,
+ task) != 0)
+ nfs_release_seqid(calldata->arg.seqid);
+ else
+ rpc_call_start(task);
}
static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4692,7 +4697,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
/* Do we need to do an open_to_lock_owner? */
if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
- return;
+ goto out_release_lock_seqid;
data->arg.open_stateid = &state->stateid;
data->arg.new_lock_owner = 1;
data->res.open_seqid = data->arg.open_seqid;
@@ -4701,10 +4706,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->server,
&data->arg.seq_args,
- &data->res.seq_res, task))
+ &data->res.seq_res,
+ task) == 0) {
+ rpc_call_start(task);
return;
- rpc_call_start(task);
- dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
+ }
+ nfs_release_seqid(data->arg.open_seqid);
+out_release_lock_seqid:
+ nfs_release_seqid(data->arg.lock_seqid);
+ dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
}
static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
@@ -5667,7 +5677,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
tbl->slots = new;
tbl->max_slots = max_slots;
}
- tbl->highest_used_slotid = -1; /* no slot is currently used */
+ tbl->highest_used_slotid = NFS4_NO_SLOT;
for (i = 0; i < tbl->max_slots; i++)
tbl->slots[i].seq_nr = ivalue;
spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index be731e6b7b9c..c6f990656f89 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -369,7 +369,7 @@ void objio_free_result(struct objlayout_io_res *oir)
kfree(objios);
}
-enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
+static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
{
switch (oep) {
case OSD_ERR_PRI_NO_ERROR:
@@ -574,7 +574,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
(unsigned long)pgio->pg_layout_private;
}
-void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
pnfs_generic_pg_init_read(pgio, req);
if (unlikely(pgio->pg_lseg == NULL))
@@ -604,7 +604,7 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
return false;
}
-void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
unsigned long stripe_end = 0;
u64 wb_size;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fe624c91bd00..2878f97bd78d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -925,8 +925,8 @@ pnfs_find_alloc_layout(struct inode *ino,
if (likely(nfsi->layout == NULL)) { /* Won the race? */
nfsi->layout = new;
return new;
- }
- pnfs_free_layout_hdr(new);
+ } else if (new != NULL)
+ pnfs_free_layout_hdr(new);
out_existing:
pnfs_get_layout_hdr(nfsi->layout);
return nfsi->layout;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2d722dba1111..dbf7bba52da0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -62,6 +62,7 @@ enum {
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
NFS_LAYOUT_ROC, /* some lseg had roc bit set */
+ NFS_LAYOUT_RETURN, /* Return this layout ASAP */
};
enum layoutdriver_policy_flags {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e831bce49766..652d3f7176a9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,6 +54,7 @@
#include <linux/parser.h>
#include <linux/nsproxy.h>
#include <linux/rcupdate.h>
+#include <linux/kthread.h>
#include <asm/uaccess.h>
@@ -415,6 +416,54 @@ void nfs_sb_deactive(struct super_block *sb)
}
EXPORT_SYMBOL_GPL(nfs_sb_deactive);
+static int nfs_deactivate_super_async_work(void *ptr)
+{
+ struct super_block *sb = ptr;
+
+ deactivate_super(sb);
+ module_put_and_exit(0);
+ return 0;
+}
+
+/*
+ * same effect as deactivate_super, but will do final unmount in kthread
+ * context
+ */
+static void nfs_deactivate_super_async(struct super_block *sb)
+{
+ struct task_struct *task;
+ char buf[INET6_ADDRSTRLEN + 1];
+ struct nfs_server *server = NFS_SB(sb);
+ struct nfs_client *clp = server->nfs_client;
+
+ if (!atomic_add_unless(&sb->s_active, -1, 1)) {
+ rcu_read_lock();
+ snprintf(buf, sizeof(buf),
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ rcu_read_unlock();
+
+ __module_get(THIS_MODULE);
+ task = kthread_run(nfs_deactivate_super_async_work, sb,
+ "%s-deactivate-super", buf);
+ if (IS_ERR(task)) {
+ pr_err("%s: kthread_run: %ld\n",
+ __func__, PTR_ERR(task));
+ /* make synchronous call and hope for the best */
+ deactivate_super(sb);
+ module_put(THIS_MODULE);
+ }
+ }
+}
+
+void nfs_sb_deactive_async(struct super_block *sb)
+{
+ struct nfs_server *server = NFS_SB(sb);
+
+ if (atomic_dec_and_test(&server->active))
+ nfs_deactivate_super_async(sb);
+}
+EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
+
/*
* Deliver file system statistics to userspace
*/
@@ -771,7 +820,7 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root)
int err = 0;
if (!page)
return -ENOMEM;
- devname = nfs_path(&dummy, root, page, PAGE_SIZE);
+ devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0);
if (IS_ERR(devname))
err = PTR_ERR(devname);
else
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 13cea637eff8..3f79c77153b8 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
nfs_dec_sillycount(data->dir);
nfs_free_unlinkdata(data);
- nfs_sb_deactive(sb);
+ nfs_sb_deactive_async(sb);
}
static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 3e7b2a0dc0c8..07f76db04ec7 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -431,7 +431,7 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
- mapping->assoc_mapping = NULL;
+ mapping->private_data = NULL;
mapping->backing_dev_info = bdi;
mapping->a_ops = &empty_aops;
}
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b97e8e..a50636025364 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
+ break;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 721d692fa8d4..6fcaeb8c902e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -258,7 +258,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
if (ret)
goto out_close_fd;
- fd_install(fd, f);
+ if (fd != FAN_NOFD)
+ fd_install(fd, f);
return fanotify_event_metadata.event_len;
out_close_fd:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5a4ee77cec51..dda089804942 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2513,18 +2513,15 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
ret = sd.num_spliced;
if (ret > 0) {
- unsigned long nr_pages;
int err;
- nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
err = generic_write_sync(out, *ppos, ret);
if (err)
ret = err;
else
*ppos += ret;
- balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
+ balance_dirty_pages_ratelimited(mapping);
}
return ret;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c1c207c36cae..d3696708fc1a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -438,7 +438,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
- thread_group_times(task, &utime, &stime);
+ thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;
}
@@ -454,7 +454,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
if (!whole) {
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- task_times(task, &utime, &stime);
+ task_cputime_adjusted(task, &utime, &stime);
gtime = task->gtime;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 144a96732dd7..aa63d25157b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -873,12 +873,119 @@ static const struct file_operations proc_environ_operations = {
.release = mem_release,
};
+static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ char buffer[PROC_NUMBUF];
+ int oom_adj = OOM_ADJUST_MIN;
+ size_t len;
+ unsigned long flags;
+
+ if (!task)
+ return -ESRCH;
+ if (lock_task_sighand(task, &flags)) {
+ if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+ oom_adj = OOM_ADJUST_MAX;
+ else
+ oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
+ OOM_SCORE_ADJ_MAX;
+ unlock_task_sighand(task, &flags);
+ }
+ put_task_struct(task);
+ len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
+ return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+
+static ssize_t oom_adj_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char buffer[PROC_NUMBUF];
+ int oom_adj;
+ unsigned long flags;
+ int err;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ err = kstrtoint(strstrip(buffer), 0, &oom_adj);
+ if (err)
+ goto out;
+ if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
+ oom_adj != OOM_DISABLE) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task) {
+ err = -ESRCH;
+ goto out;
+ }
+
+ task_lock(task);
+ if (!task->mm) {
+ err = -EINVAL;
+ goto err_task_lock;
+ }
+
+ if (!lock_task_sighand(task, &flags)) {
+ err = -ESRCH;
+ goto err_task_lock;
+ }
+
+ /*
+ * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
+ * value is always attainable.
+ */
+ if (oom_adj == OOM_ADJUST_MAX)
+ oom_adj = OOM_SCORE_ADJ_MAX;
+ else
+ oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
+
+ if (oom_adj < task->signal->oom_score_adj &&
+ !capable(CAP_SYS_RESOURCE)) {
+ err = -EACCES;
+ goto err_sighand;
+ }
+
+ /*
+ * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
+ * /proc/pid/oom_score_adj instead.
+ */
+ printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+ current->comm, task_pid_nr(current), task_pid_nr(task),
+ task_pid_nr(task));
+
+ task->signal->oom_score_adj = oom_adj;
+ trace_oom_score_adj_update(task);
+err_sighand:
+ unlock_task_sighand(task, &flags);
+err_task_lock:
+ task_unlock(task);
+ put_task_struct(task);
+out:
+ return err < 0 ? err : count;
+}
+
+static const struct file_operations proc_oom_adj_operations = {
+ .read = oom_adj_read,
+ .write = oom_adj_write,
+ .llseek = generic_file_llseek,
+};
+
static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
char buffer[PROC_NUMBUF];
- int oom_score_adj = OOM_SCORE_ADJ_MIN;
+ short oom_score_adj = OOM_SCORE_ADJ_MIN;
unsigned long flags;
size_t len;
@@ -889,7 +996,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
unlock_task_sighand(task, &flags);
}
put_task_struct(task);
- len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj);
+ len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
return simple_read_from_buffer(buf, count, ppos, buffer, len);
}
@@ -936,15 +1043,15 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
goto err_task_lock;
}
- if (oom_score_adj < task->signal->oom_score_adj_min &&
+ if ((short)oom_score_adj < task->signal->oom_score_adj_min &&
!capable(CAP_SYS_RESOURCE)) {
err = -EACCES;
goto err_sighand;
}
- task->signal->oom_score_adj = oom_score_adj;
+ task->signal->oom_score_adj = (short)oom_score_adj;
if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
- task->signal->oom_score_adj_min = oom_score_adj;
+ task->signal->oom_score_adj_min = (short)oom_score_adj;
trace_oom_score_adj_update(task);
err_sighand:
@@ -1770,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
if (!vma)
goto out_no_vma;
- result = proc_map_files_instantiate(dir, dentry, task,
- (void *)(unsigned long)vma->vm_file->f_mode);
+ if (vma->vm_file)
+ result = proc_map_files_instantiate(dir, dentry, task,
+ (void *)(unsigned long)vma->vm_file->f_mode);
out_no_vma:
up_read(&mm->mmap_sem);
@@ -2598,6 +2706,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
+ REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -2964,6 +3073,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
+ REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cceaab07ad54..43973b084abf 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/proc_fs.h>
struct ctl_table_header;
+struct mempolicy;
extern struct proc_dir_entry proc_root;
#ifdef CONFIG_PROC_SYSCTL
@@ -74,6 +75,9 @@ struct proc_maps_private {
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
+#ifdef CONFIG_NUMA
+ struct mempolicy *task_mempolicy;
+#endif
};
void proc_init_inodecache(void);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 64c3b3172367..e296572c73ed 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu)
static u64 get_idle_time(int cpu)
{
- u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
+ u64 idle, idle_time = -1ULL;
+
+ if (cpu_online(cpu))
+ idle_time = get_cpu_idle_time_us(cpu, NULL);
if (idle_time == -1ULL)
- /* !NO_HZ so we can rely on cpustat.idle */
+ /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
else
idle = usecs_to_cputime64(idle_time);
@@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu)
static u64 get_iowait_time(int cpu)
{
- u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ u64 iowait, iowait_time = -1ULL;
+
+ if (cpu_online(cpu))
+ iowait_time = get_cpu_iowait_time_us(cpu, NULL);
if (iowait_time == -1ULL)
- /* !NO_HZ so we can rely on cpustat.iowait */
+ /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
else
iowait = usecs_to_cputime64(iowait_time);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 79827ce03e3b..90c63f9392a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -90,10 +90,55 @@ static void pad_len_spaces(struct seq_file *m, int len)
seq_printf(m, "%*c", len, ' ');
}
+#ifdef CONFIG_NUMA
+/*
+ * These functions are for numa_maps but called in generic **maps seq_file
+ * ->start(), ->stop() ops.
+ *
+ * numa_maps scans all vmas under mmap_sem and checks their mempolicy.
+ * Each mempolicy object is controlled by reference counting. The problem here
+ * is how to avoid accessing dead mempolicy object.
+ *
+ * Because we're holding mmap_sem while reading seq_file, it's safe to access
+ * each vma's mempolicy, no vma objects will never drop refs to mempolicy.
+ *
+ * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy
+ * is set and replaced under mmap_sem but unrefed and cleared under task_lock().
+ * So, without task_lock(), we cannot trust get_vma_policy() because we cannot
+ * gurantee the task never exits under us. But taking task_lock() around
+ * get_vma_plicy() causes lock order problem.
+ *
+ * To access task->mempolicy without lock, we hold a reference count of an
+ * object pointed by task->mempolicy and remember it. This will guarantee
+ * that task->mempolicy points to an alive object or NULL in numa_maps accesses.
+ */
+static void hold_task_mempolicy(struct proc_maps_private *priv)
+{
+ struct task_struct *task = priv->task;
+
+ task_lock(task);
+ priv->task_mempolicy = task->mempolicy;
+ mpol_get(priv->task_mempolicy);
+ task_unlock(task);
+}
+static void release_task_mempolicy(struct proc_maps_private *priv)
+{
+ mpol_put(priv->task_mempolicy);
+}
+#else
+static void hold_task_mempolicy(struct proc_maps_private *priv)
+{
+}
+static void release_task_mempolicy(struct proc_maps_private *priv)
+{
+}
+#endif
+
static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
{
if (vma && vma != priv->tail_vma) {
struct mm_struct *mm = vma->vm_mm;
+ release_task_mempolicy(priv);
up_read(&mm->mmap_sem);
mmput(mm);
}
@@ -132,7 +177,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
tail_vma = get_gate_vma(priv->task->mm);
priv->tail_vma = tail_vma;
-
+ hold_task_mempolicy(priv);
/* Start with last addr hint */
vma = find_vma(mm, last_addr);
if (last_addr && vma) {
@@ -159,6 +204,7 @@ out:
if (vma)
return vma;
+ release_task_mempolicy(priv);
/* End of vmas has been reached */
m->version = (tail_vma != NULL)? 0: -1UL;
up_read(&mm->mmap_sem);
@@ -1158,6 +1204,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
struct vm_area_struct *vma = v;
struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file;
+ struct task_struct *task = proc_priv->task;
struct mm_struct *mm = vma->vm_mm;
struct mm_walk walk = {};
struct mempolicy *pol;
@@ -1177,7 +1224,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
walk.private = md;
walk.mm = mm;
- pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
+ pol = get_vma_policy(task, vma, vma->vm_start);
mpol_to_str(buffer, sizeof(buffer), pol, 0);
mpol_cond_put(pol);
@@ -1189,7 +1236,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_printf(m, " heap");
} else {
- pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
+ pid_t tid = vm_is_stack(task, vma, is_pid);
if (tid != 0) {
/*
* Thread stack in /proc/PID/task/TID/maps or
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 4ab572e6d277..ed1d8c7212da 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -49,6 +49,7 @@ struct pstore_private {
struct pstore_info *psi;
enum pstore_type_id type;
u64 id;
+ int count;
ssize_t size;
char data[];
};
@@ -175,7 +176,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
struct pstore_private *p = dentry->d_inode->i_private;
if (p->psi->erase)
- p->psi->erase(p->type, p->id, p->psi);
+ p->psi->erase(p->type, p->id, p->count,
+ dentry->d_inode->i_ctime, p->psi);
return simple_unlink(dir, dentry);
}
@@ -270,7 +272,7 @@ int pstore_is_mounted(void)
* Load it up with "size" bytes of data from "buf".
* Set the mtime & ctime to the date that this record was originally stored.
*/
-int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
+int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
char *data, size_t size, struct timespec time,
struct pstore_info *psi)
{
@@ -306,6 +308,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
goto fail_alloc;
private->type = type;
private->id = id;
+ private->count = count;
private->psi = psi;
switch (type) {
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 4847f588b7d5..937d820f273c 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -50,7 +50,7 @@ extern struct pstore_info *psinfo;
extern void pstore_set_kmsg_bytes(int);
extern void pstore_get_records(int);
extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
- char *data, size_t size,
+ int count, char *data, size_t size,
struct timespec time, struct pstore_info *psi);
extern int pstore_is_mounted(void);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index a40da07e93d6..5ea2e77ff023 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -136,7 +136,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
break;
ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
- hsize + len, psinfo);
+ oopscount, hsize + len, psinfo);
if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
pstore_new_entry = 1;
@@ -161,6 +161,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
while (s < e) {
unsigned long flags;
+ u64 id;
if (c > psinfo->bufsize)
c = psinfo->bufsize;
@@ -172,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
spin_lock_irqsave(&psinfo->buf_lock, flags);
}
memcpy(psinfo->buf, s, c);
- psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo);
+ psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo);
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
s += c;
c = e - s;
@@ -196,7 +197,7 @@ static void pstore_register_console(void) {}
static int pstore_write_compat(enum pstore_type_id type,
enum kmsg_dump_reason reason,
- u64 *id, unsigned int part,
+ u64 *id, unsigned int part, int count,
size_t size, struct pstore_info *psi)
{
return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi);
@@ -266,6 +267,7 @@ void pstore_get_records(int quiet)
char *buf = NULL;
ssize_t size;
u64 id;
+ int count;
enum pstore_type_id type;
struct timespec time;
int failed = 0, rc;
@@ -277,9 +279,9 @@ void pstore_get_records(int quiet)
if (psi->open && psi->open(psi))
goto out;
- while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
- rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size,
- time, psi);
+ while ((size = psi->read(&id, &type, &count, &time, &buf, psi)) > 0) {
+ rc = pstore_mkfile(type, psi->name, id, count, buf,
+ (size_t)size, time, psi);
kfree(buf);
buf = NULL;
if (rc && (rc != -EEXIST || !quiet))
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1a4f6da58eab..2bfa36e0ffe8 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -132,9 +132,8 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
}
static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
- struct timespec *time,
- char **buf,
- struct pstore_info *psi)
+ int *count, struct timespec *time,
+ char **buf, struct pstore_info *psi)
{
ssize_t size;
struct ramoops_context *cxt = psi->data;
@@ -236,8 +235,8 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
return 0;
}
-static int ramoops_pstore_erase(enum pstore_type_id type, u64 id,
- struct pstore_info *psi)
+static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
+ struct timespec time, struct pstore_info *psi)
{
struct ramoops_context *cxt = psi->data;
struct persistent_ram_zone *prz;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 557a9c20a215..05ae3c97f7a5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1160,6 +1160,8 @@ static int need_print_warning(struct dquot_warn *warn)
return uid_eq(current_fsuid(), warn->w_dq_id.uid);
case GRPQUOTA:
return in_group_p(warn->w_dq_id.gid);
+ case PRJQUOTA: /* Never taken... Just make gcc happy */
+ return 0;
}
return 0;
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f27f01a98aa2..d83736fbc26c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1782,8 +1782,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
BUG_ON(!th->t_trans_id);
- dquot_initialize(inode);
+ reiserfs_write_unlock(inode->i_sb);
err = dquot_alloc_inode(inode);
+ reiserfs_write_lock(inode->i_sb);
if (err)
goto out_end_trans;
if (!dir->i_nlink) {
@@ -1979,8 +1980,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
out_end_trans:
journal_end(th, th->t_super, th->t_blocks_allocated);
+ reiserfs_write_unlock(inode->i_sb);
/* Drop can be outside and it needs more credits so it's better to have it outside */
dquot_drop(inode);
+ reiserfs_write_lock(inode->i_sb);
inode->i_flags |= S_NOQUOTA;
make_bad_inode(inode);
@@ -3103,10 +3106,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
/* must be turned off for recursive notify_change calls */
ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
- depth = reiserfs_write_lock_once(inode->i_sb);
if (is_quota_modification(inode, attr))
dquot_initialize(inode);
-
+ depth = reiserfs_write_lock_once(inode->i_sb);
if (attr->ia_valid & ATTR_SIZE) {
/* version 2 items will be caught by the s_maxbytes check
** done for us in vmtruncate
@@ -3170,7 +3172,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
error = journal_begin(&th, inode->i_sb, jbegin_count);
if (error)
goto out;
+ reiserfs_write_unlock_once(inode->i_sb, depth);
error = dquot_transfer(inode, attr);
+ depth = reiserfs_write_lock_once(inode->i_sb);
if (error) {
journal_end(&th, inode->i_sb, jbegin_count);
goto out;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index f8afa4b162b8..2f40a4c70a4d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
key2type(&(key->on_disk_key)));
#endif
+ reiserfs_write_unlock(inode->i_sb);
retval = dquot_alloc_space_nodirty(inode, pasted_size);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
pathrelse(search_path);
return retval;
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
"reiserquota insert_item(): allocating %u id=%u type=%c",
quota_bytes, inode->i_uid, head2type(ih));
#endif
+ reiserfs_write_unlock(inode->i_sb);
/* We can't dirty inode here. It would be immediately written but
* appropriate stat item isn't inserted yet... */
retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
pathrelse(path);
return retval;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1078ae179993..418bdc3a57da 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -298,7 +298,9 @@ static int finish_unfinished(struct super_block *s)
retval = remove_save_link_only(s, &save_link_key, 0);
continue;
}
+ reiserfs_write_unlock(s);
dquot_initialize(inode);
+ reiserfs_write_lock(s);
if (truncate && S_ISDIR(inode->i_mode)) {
/* We got a truncate request for a dir which is impossible.
@@ -1335,7 +1337,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
kfree(qf_names[i]);
#endif
err = -EINVAL;
- goto out_err;
+ goto out_unlock;
}
#ifdef CONFIG_QUOTA
handle_quota_files(s, qf_names, &qfmt);
@@ -1379,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (blocks) {
err = reiserfs_resize(s, blocks);
if (err != 0)
- goto out_err;
+ goto out_unlock;
}
if (*mount_flags & MS_RDONLY) {
@@ -1389,9 +1391,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
/* it is read-only already */
goto out_ok;
+ /*
+ * Drop write lock. Quota will retake it when needed and lock
+ * ordering requires calling dquot_suspend() without it.
+ */
+ reiserfs_write_unlock(s);
err = dquot_suspend(s, -1);
if (err < 0)
goto out_err;
+ reiserfs_write_lock(s);
/* try to remount file system with read-only permissions */
if (sb_umount_state(rs) == REISERFS_VALID_FS
@@ -1401,7 +1409,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
err = journal_begin(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;
/* Mounting a rw partition read-only. */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1416,7 +1424,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (reiserfs_is_journal_aborted(journal)) {
err = journal->j_errno;
- goto out_err;
+ goto out_unlock;
}
handle_data_mode(s, mount_options);
@@ -1425,7 +1433,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
err = journal_begin(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;
/* Mount a partition which is read-only, read-write */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1442,10 +1450,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
SB_JOURNAL(s)->j_must_wait = 1;
err = journal_end(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;
if (!(*mount_flags & MS_RDONLY)) {
+ /*
+ * Drop write lock. Quota will retake it when needed and lock
+ * ordering requires calling dquot_resume() without it.
+ */
+ reiserfs_write_unlock(s);
dquot_resume(s, -1);
+ reiserfs_write_lock(s);
finish_unfinished(s);
reiserfs_xattr_init(s, *mount_flags);
}
@@ -1455,9 +1469,10 @@ out_ok:
reiserfs_write_unlock(s);
return 0;
+out_unlock:
+ reiserfs_write_unlock(s);
out_err:
kfree(new_opts);
- reiserfs_write_unlock(s);
return err;
}
@@ -2095,13 +2110,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
+ reiserfs_write_unlock(dquot->dq_sb);
ret = dquot_commit(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(dquot->dq_sb);
return ret;
}
@@ -2117,13 +2134,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
+ reiserfs_write_unlock(dquot->dq_sb);
ret = dquot_acquire(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(dquot->dq_sb);
return ret;
}
@@ -2137,19 +2156,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
ret =
journal_begin(&th, dquot->dq_sb,
REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+ reiserfs_write_unlock(dquot->dq_sb);
if (ret) {
/* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot);
goto out;
}
ret = dquot_release(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
reiserfs_write_unlock(dquot->dq_sb);
+out:
return ret;
}
@@ -2174,11 +2195,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
ret = journal_begin(&th, sb, 2);
if (ret)
goto out;
+ reiserfs_write_unlock(sb);
ret = dquot_commit_info(sb, type);
+ reiserfs_write_lock(sb);
err = journal_end(&th, sb, 2);
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(sb);
return ret;
}
@@ -2203,8 +2226,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
struct reiserfs_transaction_handle th;
int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
- if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
- return -EINVAL;
+ reiserfs_write_lock(sb);
+ if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
+ err = -EINVAL;
+ goto out;
+ }
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb) {
@@ -2246,8 +2272,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (err)
goto out;
}
- err = dquot_quota_on(sb, type, format_id, path);
+ reiserfs_write_unlock(sb);
+ return dquot_quota_on(sb, type, format_id, path);
out:
+ reiserfs_write_unlock(sb);
return err;
}
@@ -2320,7 +2348,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ reiserfs_write_lock(sb);
err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
+ reiserfs_write_unlock(sb);
if (err)
goto out;
if (offset || tocopy != sb->s_blocksize)
@@ -2336,10 +2366,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
flush_dcache_page(bh->b_page);
set_buffer_uptodate(bh);
unlock_buffer(bh);
+ reiserfs_write_lock(sb);
reiserfs_prepare_for_journal(sb, bh, 1);
journal_mark_dirty(current->journal_info, sb, bh);
if (!journal_quota)
reiserfs_add_ordered_list(inode, bh);
+ reiserfs_write_unlock(sb);
brelse(bh);
offset = 0;
towrite -= tocopy;
diff --git a/fs/splice.c b/fs/splice.c
index 13e5b4776e7a..8890604e3fcd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1024,17 +1024,14 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
ret = sd.num_spliced;
if (ret > 0) {
- unsigned long nr_pages;
int err;
- nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
err = generic_write_sync(out, *ppos, ret);
if (err)
ret = err;
else
*ppos += ret;
- balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
+ balance_dirty_pages_ratelimited(mapping);
}
sb_end_write(inode->i_sb);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 6b0bb00d4d2b..2fbdff6be25c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -485,20 +485,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
/**
* sysfs_pathname - return full path to sysfs dirent
* @sd: sysfs_dirent whose path we want
- * @path: caller allocated buffer
+ * @path: caller allocated buffer of size PATH_MAX
*
* Gives the name "/" to the sysfs_root entry; any path returned
* is relative to wherever sysfs is mounted.
- *
- * XXX: does no error checking on @path size
*/
static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
{
if (sd->s_parent) {
sysfs_pathname(sd->s_parent, path);
- strcat(path, "/");
+ strlcat(path, "/", PATH_MAX);
}
- strcat(path, sd->s_name);
+ strlcat(path, sd->s_name, PATH_MAX);
return path;
}
@@ -531,9 +529,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
char *path = kzalloc(PATH_MAX, GFP_KERNEL);
WARN(1, KERN_WARNING
"sysfs: cannot create duplicate filename '%s'\n",
- (path == NULL) ? sd->s_name :
- strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
- sd->s_name));
+ (path == NULL) ? sd->s_name
+ : (sysfs_pathname(acxt->parent_sd, path),
+ strlcat(path, "/", PATH_MAX),
+ strlcat(path, sd->s_name, PATH_MAX),
+ path));
kfree(path);
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 00012e31829d..602f56db0442 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -485,8 +485,8 @@ const struct file_operations sysfs_file_operations = {
.poll = sysfs_poll,
};
-int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
- const void **pns)
+static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
+ const void **pns)
{
struct sysfs_dirent *dir_sd = kobj->sd;
const struct sysfs_ops *ops;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 28ec13af28d9..2dcf3d473fec 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
if (!lprops) {
lprops = ubifs_fast_find_freeable(c);
if (!lprops) {
- ubifs_assert(c->freeable_cnt == 0);
- if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ /*
+ * The first condition means the following: go scan the
+ * LPT if there are uncategorized lprops, which means
+ * there may be freeable LEBs there (UBIFS does not
+ * store the information about freeable LEBs in the
+ * master node).
+ */
+ if (c->in_a_category_cnt != c->main_lebs ||
+ c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ ubifs_assert(c->freeable_cnt == 0);
lprops = scan_for_leb_for_idx(c);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index e5a2a35a46dc..46190a7c42a6 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
default:
ubifs_assert(0);
}
+
lprops->flags &= ~LPROPS_CAT_MASK;
lprops->flags |= cat;
+ c->in_a_category_cnt += 1;
+ ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
}
/**
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
default:
ubifs_assert(0);
}
+
+ c->in_a_category_cnt -= 1;
+ ubifs_assert(c->in_a_category_cnt >= 0);
}
/**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 5486346d0a3f..d133c276fe05 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1183,6 +1183,8 @@ struct ubifs_debug_info;
* @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
* @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
* @freeable_cnt: number of freeable LEBs in @freeable_list
+ * @in_a_category_cnt: count of lprops which are in a certain category, which
+ * basically meants that they were loaded from the flash
*
* @ltab_lnum: LEB number of LPT's own lprops table
* @ltab_offs: offset of LPT's own lprops table
@@ -1412,6 +1414,7 @@ struct ubifs_info {
struct list_head freeable_list;
struct list_head frdi_idx_list;
int freeable_cnt;
+ int in_a_category_cnt;
int ltab_lnum;
int ltab_offs;
diff --git a/fs/xattr.c b/fs/xattr.c
index e164dddb8e96..e21c119f4f99 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -846,7 +846,7 @@ static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
const void *value, size_t size, int flags)
{
struct simple_xattr *xattr;
- struct simple_xattr *uninitialized_var(new_xattr);
+ struct simple_xattr *new_xattr = NULL;
int err = 0;
/* value == NULL means remove */