diff options
Diffstat (limited to 'fs')
110 files changed, 1268 insertions, 757 deletions
@@ -75,6 +75,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) unsigned int sz = sizeof(struct bio) + extra_size; struct kmem_cache *slab = NULL; struct bio_slab *bslab, *new_bio_slabs; + unsigned int new_bio_slab_max; unsigned int i, entry = -1; mutex_lock(&bio_slab_lock); @@ -97,12 +98,13 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) goto out_unlock; if (bio_slab_nr == bio_slab_max && entry == -1) { - bio_slab_max <<= 1; + new_bio_slab_max = bio_slab_max << 1; new_bio_slabs = krealloc(bio_slabs, - bio_slab_max * sizeof(struct bio_slab), + new_bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL); if (!new_bio_slabs) goto out_unlock; + bio_slab_max = new_bio_slab_max; bio_slabs = new_bio_slabs; } if (entry == -1) diff --git a/fs/block_dev.c b/fs/block_dev.c index b3c1d3dae77d..ab3a456f6650 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode, spin_unlock(&dst->wb.list_lock); } -sector_t blkdev_max_block(struct block_device *bdev) -{ - sector_t retval = ~((sector_t)0); - loff_t sz = i_size_read(bdev->bd_inode); - - if (sz) { - unsigned int size = block_size(bdev); - unsigned int sizebits = blksize_bits(size); - retval = (sz >> sizebits); - } - return retval; -} - /* Kill _all_ buffers and pagecache , dirty or not.. */ void kill_bdev(struct block_device *bdev) { @@ -116,8 +103,6 @@ EXPORT_SYMBOL(invalidate_bdev); int set_blocksize(struct block_device *bdev, int size) { - struct address_space *mapping; - /* Size must be a power of two, and between 512 and PAGE_SIZE */ if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) return -EINVAL; @@ -126,19 +111,6 @@ int set_blocksize(struct block_device *bdev, int size) if (size < bdev_logical_block_size(bdev)) return -EINVAL; - /* Prevent starting I/O or mapping the device */ - percpu_down_write(&bdev->bd_block_size_semaphore); - - /* Check that the block device is not memory mapped */ - mapping = bdev->bd_inode->i_mapping; - mutex_lock(&mapping->i_mmap_mutex); - if (mapping_mapped(mapping)) { - mutex_unlock(&mapping->i_mmap_mutex); - percpu_up_write(&bdev->bd_block_size_semaphore); - return -EBUSY; - } - mutex_unlock(&mapping->i_mmap_mutex); - /* Don't change the size if it is same as current */ if (bdev->bd_block_size != size) { sync_blockdev(bdev); @@ -146,9 +118,6 @@ int set_blocksize(struct block_device *bdev, int size) bdev->bd_inode->i_blkbits = blksize_bits(size); kill_bdev(bdev); } - - percpu_up_write(&bdev->bd_block_size_semaphore); - return 0; } @@ -181,52 +150,12 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= blkdev_max_block(I_BDEV(inode))) { - if (create) - return -EIO; - - /* - * for reads, we're just trying to fill a partial page. - * return a hole, they will have to call get_block again - * before they can fill it, and they will get -EIO at that - * time - */ - return 0; - } bh->b_bdev = I_BDEV(inode); bh->b_blocknr = iblock; set_buffer_mapped(bh); return 0; } -static int -blkdev_get_blocks(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) -{ - sector_t end_block = blkdev_max_block(I_BDEV(inode)); - unsigned long max_blocks = bh->b_size >> inode->i_blkbits; - - if ((iblock + max_blocks) > end_block) { - max_blocks = end_block - iblock; - if ((long)max_blocks <= 0) { - if (create) - return -EIO; /* write fully beyond EOF */ - /* - * It is a read which is fully beyond EOF. We return - * a !buffer_mapped buffer - */ - max_blocks = 0; - } - } - - bh->b_bdev = I_BDEV(inode); - bh->b_blocknr = iblock; - bh->b_size = max_blocks << inode->i_blkbits; - if (max_blocks) - set_buffer_mapped(bh); - return 0; -} - static ssize_t blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -235,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct inode *inode = file->f_mapping->host; return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, - nr_segs, blkdev_get_blocks, NULL, NULL, 0); + nr_segs, blkdev_get_block, NULL, NULL, 0); } int __sync_blockdev(struct block_device *bdev, int wait) @@ -459,12 +388,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); if (!ei) return NULL; - - if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) { - kmem_cache_free(bdev_cachep, ei); - return NULL; - } - return &ei->vfs_inode; } @@ -473,8 +396,6 @@ static void bdev_i_callback(struct rcu_head *head) struct inode *inode = container_of(head, struct inode, i_rcu); struct bdev_inode *bdi = BDEV_I(inode); - percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore); - kmem_cache_free(bdev_cachep, bdi); } @@ -1593,22 +1514,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) return blkdev_ioctl(bdev, mode, cmd, arg); } -ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - ssize_t ret; - struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); - - percpu_down_read(&bdev->bd_block_size_semaphore); - - ret = generic_file_aio_read(iocb, iov, nr_segs, pos); - - percpu_up_read(&bdev->bd_block_size_semaphore); - - return ret; -} -EXPORT_SYMBOL_GPL(blkdev_aio_read); - /* * Write data to the block device. Only intended for the block device itself * and the raw driver which basically is a fake block device. @@ -1620,16 +1525,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; - struct block_device *bdev = I_BDEV(file->f_mapping->host); struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); blk_start_plug(&plug); - - percpu_down_read(&bdev->bd_block_size_semaphore); - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; @@ -1638,27 +1539,25 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } - - percpu_up_read(&bdev->bd_block_size_semaphore); - blk_finish_plug(&plug); - return ret; } EXPORT_SYMBOL_GPL(blkdev_aio_write); -static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) +static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - int ret; - struct block_device *bdev = I_BDEV(file->f_mapping->host); - - percpu_down_read(&bdev->bd_block_size_semaphore); - - ret = generic_file_mmap(file, vma); + struct file *file = iocb->ki_filp; + struct inode *bd_inode = file->f_mapping->host; + loff_t size = i_size_read(bd_inode); - percpu_up_read(&bdev->bd_block_size_semaphore); + if (pos >= size) + return 0; - return ret; + size -= pos; + if (size < INT_MAX) + nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); + return generic_file_aio_read(iocb, iov, nr_segs, pos); } /* @@ -1691,9 +1590,9 @@ const struct file_operations def_blk_fops = { .llseek = block_llseek, .read = do_sync_read, .write = do_sync_write, - .aio_read = blkdev_aio_read, + .aio_read = blkdev_aio_read, .aio_write = blkdev_aio_write, - .mmap = blkdev_mmap, + .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f3187938e081..208d8aa5b07e 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -283,9 +283,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; } - rcu_read_lock(); - root_level = btrfs_header_level(root->node); - rcu_read_unlock(); + root_level = btrfs_old_root_level(root, time_seq); if (root_level + 1 == level) goto out; @@ -1177,16 +1175,15 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, return ret; } -static char *ref_to_path(struct btrfs_root *fs_root, - struct btrfs_path *path, - u32 name_len, unsigned long name_off, - struct extent_buffer *eb_in, u64 parent, - char *dest, u32 size) +char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, + u32 name_len, unsigned long name_off, + struct extent_buffer *eb_in, u64 parent, + char *dest, u32 size) { int slot; u64 next_inum; int ret; - s64 bytes_left = size - 1; + s64 bytes_left = ((s64)size) - 1; struct extent_buffer *eb = eb_in; struct btrfs_key found_key; int leave_spinning = path->leave_spinning; @@ -1266,10 +1263,10 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct extent_buffer *eb_in, u64 parent, char *dest, u32 size) { - return ref_to_path(fs_root, path, - btrfs_inode_ref_name_len(eb_in, iref), - (unsigned long)(iref + 1), - eb_in, parent, dest, size); + return btrfs_ref_to_path(fs_root, path, + btrfs_inode_ref_name_len(eb_in, iref), + (unsigned long)(iref + 1), + eb_in, parent, dest, size); } /* @@ -1715,9 +1712,8 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, ipath->fspath->bytes_left - s_ptr : 0; fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; - fspath = ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, - name_off, eb, inum, fspath_min, - bytes_left); + fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, + name_off, eb, inum, fspath_min, bytes_left); if (IS_ERR(fspath)) return PTR_ERR(fspath); diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index e75533043a5f..d61feca79455 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -62,6 +62,10 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, struct btrfs_inode_ref *iref, struct extent_buffer *eb, u64 parent, char *dest, u32 size); +char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, + u32 name_len, unsigned long name_off, + struct extent_buffer *eb_in, u64 parent, + char *dest, u32 size); struct btrfs_data_container *init_data_container(u32 total_bytes); struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b33436211000..cdfb4c49a806 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -596,6 +596,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, if (tree_mod_dont_log(fs_info, eb)) return 0; + /* + * When we override something during the move, we log these removals. + * This can only happen when we move towards the beginning of the + * buffer, i.e. dst_slot < src_slot. + */ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, MOD_LOG_KEY_REMOVE_WHILE_MOVING); @@ -647,8 +652,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (tree_mod_dont_log(fs_info, NULL)) return 0; - __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); if (ret < 0) goto out; @@ -926,12 +929,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, buf, 1, 1); BUG_ON(ret); /* -ENOMEM */ } - /* - * don't log freeing in case we're freeing the root node, this - * is done by tree_mod_log_set_root_pointer later - */ - if (buf != root->node && btrfs_header_level(buf) != 0) - tree_mod_log_free_eb(root->fs_info, buf); + tree_mod_log_free_eb(root->fs_info, buf); clean_tree_block(trans, root, buf); *last_ref = 1; } @@ -1225,6 +1223,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, free_extent_buffer(eb); __tree_mod_log_rewind(eb_rewin, time_seq, tm); + WARN_ON(btrfs_header_nritems(eb_rewin) > + BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root)); return eb_rewin; } @@ -1241,9 +1241,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq) { struct tree_mod_elem *tm; struct extent_buffer *eb; + struct extent_buffer *old; struct tree_mod_root *old_root = NULL; u64 old_generation = 0; u64 logical; + u32 blocksize; eb = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); @@ -1259,14 +1261,32 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } tm = tree_mod_log_search(root->fs_info, logical, time_seq); - if (old_root) + if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); + blocksize = btrfs_level_size(root, old_root->level); + old = read_tree_block(root, logical, blocksize, 0); + if (!old) { + pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", + logical); + WARN_ON(1); + } else { + eb = btrfs_clone_extent_buffer(old); + free_extent_buffer(old); + } + } else if (old_root) { + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); eb = alloc_dummy_extent_buffer(logical, root->nodesize); - else + } else { eb = btrfs_clone_extent_buffer(root->node); - btrfs_tree_read_unlock(root->node); - free_extent_buffer(root->node); + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); + } + if (!eb) return NULL; + extent_buffer_get(eb); btrfs_tree_read_lock(eb); if (old_root) { btrfs_set_header_bytenr(eb, eb->start); @@ -1279,11 +1299,28 @@ get_old_root(struct btrfs_root *root, u64 time_seq) __tree_mod_log_rewind(eb, time_seq, tm); else WARN_ON(btrfs_header_level(eb) != 0); - extent_buffer_get(eb); + WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); return eb; } +int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq) +{ + struct tree_mod_elem *tm; + int level; + + tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); + if (tm && tm->op == MOD_LOG_ROOT_REPLACE) { + level = tm->old_root.level; + } else { + rcu_read_lock(); + level = btrfs_header_level(root->node); + rcu_read_unlock(); + } + + return level; +} + static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) @@ -1725,6 +1762,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } + tree_mod_log_free_eb(root->fs_info, root->node); tree_mod_log_set_root_pointer(root, child); rcu_assign_pointer(root->node, child); @@ -2970,8 +3008,10 @@ static int push_node_left(struct btrfs_trans_handle *trans, push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - tree_mod_log_eb_move(root->fs_info, src, 0, push_items, - src_nritems - push_items); + /* + * don't call tree_mod_log_eb_move here, key removal was already + * fully logged by tree_mod_log_eb_copy above. + */ memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(push_items), (src_nritems - push_items) * diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 926c9ffc66d9..c72ead869507 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3120,6 +3120,7 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) { return atomic_inc_return(&fs_info->tree_mod_seq); } +int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, @@ -3338,6 +3339,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode); int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_cleanup(struct btrfs_root *root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7cda51995c1e..22a0439e5a86 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3416,8 +3416,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) num_dirty = root->fs_info->dirty_metadata_bytes; if (num_dirty > thresh) { - balance_dirty_pages_ratelimited_nr( - root->fs_info->btree_inode->i_mapping, 1); + balance_dirty_pages_ratelimited( + root->fs_info->btree_inode->i_mapping); } return; } @@ -3437,8 +3437,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) num_dirty = root->fs_info->dirty_metadata_bytes; if (num_dirty > thresh) { - balance_dirty_pages_ratelimited_nr( - root->fs_info->btree_inode->i_mapping, 1); + balance_dirty_pages_ratelimited( + root->fs_info->btree_inode->i_mapping); } return; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8036d3a84853..472873a94d96 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4110,8 +4110,8 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) return eb; err: - for (i--; i >= 0; i--) - __free_page(eb->pages[i]); + for (; i > 0; i--) + __free_page(eb->pages[i - 1]); __free_extent_buffer(eb); return NULL; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9ab1bed88116..a8ee75cb96ee 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1346,8 +1346,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, cond_resched(); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - dirty_pages); + balance_dirty_pages_ratelimited(inode->i_mapping); if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 85a1e5053fe6..95542a1b3dfc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -94,8 +94,6 @@ static noinline int cow_file_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); -static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode); static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, @@ -2746,8 +2744,9 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, return btrfs_update_inode_item(trans, root, inode); } -static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode) +noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) { int ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 61168805f175..5b3429ab8ec1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -343,7 +343,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return -EOPNOTSUPP; if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; - if (range.start > total_bytes) + if (range.start > total_bytes || + range.len < fs_info->sb->s_blocksize) return -EINVAL; range.len = min(range.len, total_bytes - range.start); @@ -570,7 +571,8 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); } - BUG_ON(ret); + if (ret) + goto fail; ret = pending_snapshot->error; if (ret) @@ -1223,7 +1225,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, } defrag_count += ret; - balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); + balance_dirty_pages_ratelimited(inode->i_mapping); mutex_unlock(&inode->i_mutex); if (newer_than) { diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5039686df6ae..fe9d02c45f8e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -790,8 +790,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, } path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out_free_root; + } key.objectid = 0; key.type = BTRFS_QGROUP_STATUS_KEY; @@ -800,7 +802,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*ptr)); if (ret) - goto out; + goto out_free_path; leaf = path->nodes[0]; ptr = btrfs_item_ptr(leaf, path->slots[0], @@ -818,8 +820,15 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, fs_info->quota_root = quota_root; fs_info->pending_quota_state = 1; spin_unlock(&fs_info->qgroup_lock); -out: +out_free_path: btrfs_free_path(path); +out_free_root: + if (ret) { + free_extent_buffer(quota_root->node); + free_extent_buffer(quota_root->commit_root); + kfree(quota_root); + } +out: return ret; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c7beb543a4a8..e78b297b0b00 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -745,31 +745,36 @@ typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, void *ctx); /* - * Helper function to iterate the entries in ONE btrfs_inode_ref. + * Helper function to iterate the entries in ONE btrfs_inode_ref or + * btrfs_inode_extref. * The iterate callback may return a non zero value to stop iteration. This can * be a negative value for error codes or 1 to simply stop it. * - * path must point to the INODE_REF when called. + * path must point to the INODE_REF or INODE_EXTREF when called. */ static int iterate_inode_ref(struct send_ctx *sctx, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *found_key, int resolve, iterate_inode_ref_t iterate, void *ctx) { - struct extent_buffer *eb; + struct extent_buffer *eb = path->nodes[0]; struct btrfs_item *item; struct btrfs_inode_ref *iref; + struct btrfs_inode_extref *extref; struct btrfs_path *tmp_path; struct fs_path *p; - u32 cur; - u32 len; + u32 cur = 0; u32 total; - int slot; + int slot = path->slots[0]; u32 name_len; char *start; int ret = 0; - int num; + int num = 0; int index; + u64 dir; + unsigned long name_off; + unsigned long elem_size; + unsigned long ptr; p = fs_path_alloc_reversed(sctx); if (!p) @@ -781,24 +786,40 @@ static int iterate_inode_ref(struct send_ctx *sctx, return -ENOMEM; } - eb = path->nodes[0]; - slot = path->slots[0]; - item = btrfs_item_nr(eb, slot); - iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); - cur = 0; - len = 0; - total = btrfs_item_size(eb, item); - num = 0; + if (found_key->type == BTRFS_INODE_REF_KEY) { + ptr = (unsigned long)btrfs_item_ptr(eb, slot, + struct btrfs_inode_ref); + item = btrfs_item_nr(eb, slot); + total = btrfs_item_size(eb, item); + elem_size = sizeof(*iref); + } else { + ptr = btrfs_item_ptr_offset(eb, slot); + total = btrfs_item_size_nr(eb, slot); + elem_size = sizeof(*extref); + } + while (cur < total) { fs_path_reset(p); - name_len = btrfs_inode_ref_name_len(eb, iref); - index = btrfs_inode_ref_index(eb, iref); + if (found_key->type == BTRFS_INODE_REF_KEY) { + iref = (struct btrfs_inode_ref *)(ptr + cur); + name_len = btrfs_inode_ref_name_len(eb, iref); + name_off = (unsigned long)(iref + 1); + index = btrfs_inode_ref_index(eb, iref); + dir = found_key->offset; + } else { + extref = (struct btrfs_inode_extref *)(ptr + cur); + name_len = btrfs_inode_extref_name_len(eb, extref); + name_off = (unsigned long)&extref->name; + index = btrfs_inode_extref_index(eb, extref); + dir = btrfs_inode_extref_parent(eb, extref); + } + if (resolve) { - start = btrfs_iref_to_path(root, tmp_path, iref, eb, - found_key->offset, p->buf, - p->buf_len); + start = btrfs_ref_to_path(root, tmp_path, name_len, + name_off, eb, dir, + p->buf, p->buf_len); if (IS_ERR(start)) { ret = PTR_ERR(start); goto out; @@ -809,9 +830,10 @@ static int iterate_inode_ref(struct send_ctx *sctx, p->buf_len + p->buf - start); if (ret < 0) goto out; - start = btrfs_iref_to_path(root, tmp_path, iref, - eb, found_key->offset, p->buf, - p->buf_len); + start = btrfs_ref_to_path(root, tmp_path, + name_len, name_off, + eb, dir, + p->buf, p->buf_len); if (IS_ERR(start)) { ret = PTR_ERR(start); goto out; @@ -820,21 +842,16 @@ static int iterate_inode_ref(struct send_ctx *sctx, } p->start = start; } else { - ret = fs_path_add_from_extent_buffer(p, eb, - (unsigned long)(iref + 1), name_len); + ret = fs_path_add_from_extent_buffer(p, eb, name_off, + name_len); if (ret < 0) goto out; } - - len = sizeof(*iref) + name_len; - iref = (struct btrfs_inode_ref *)((char *)iref + len); - cur += len; - - ret = iterate(num, found_key->offset, index, p, ctx); + cur += elem_size + name_len; + ret = iterate(num, dir, index, p, ctx); if (ret) goto out; - num++; } @@ -998,7 +1015,8 @@ static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, } btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); if (found_key.objectid != ino || - found_key.type != BTRFS_INODE_REF_KEY) { + (found_key.type != BTRFS_INODE_REF_KEY && + found_key.type != BTRFS_INODE_EXTREF_KEY)) { ret = -ENOENT; goto out; } @@ -1551,8 +1569,8 @@ static int get_first_ref(struct send_ctx *sctx, struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; - struct btrfs_inode_ref *iref; int len; + u64 parent_dir; path = alloc_path_for_send(); if (!path) @@ -1568,27 +1586,41 @@ static int get_first_ref(struct send_ctx *sctx, if (!ret) btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - if (ret || found_key.objectid != key.objectid || - found_key.type != key.type) { + if (ret || found_key.objectid != ino || + (found_key.type != BTRFS_INODE_REF_KEY && + found_key.type != BTRFS_INODE_EXTREF_KEY)) { ret = -ENOENT; goto out; } - iref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_ref); - len = btrfs_inode_ref_name_len(path->nodes[0], iref); - ret = fs_path_add_from_extent_buffer(name, path->nodes[0], - (unsigned long)(iref + 1), len); + if (key.type == BTRFS_INODE_REF_KEY) { + struct btrfs_inode_ref *iref; + iref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_ref); + len = btrfs_inode_ref_name_len(path->nodes[0], iref); + ret = fs_path_add_from_extent_buffer(name, path->nodes[0], + (unsigned long)(iref + 1), + len); + parent_dir = found_key.offset; + } else { + struct btrfs_inode_extref *extref; + extref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_extref); + len = btrfs_inode_extref_name_len(path->nodes[0], extref); + ret = fs_path_add_from_extent_buffer(name, path->nodes[0], + (unsigned long)&extref->name, len); + parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); + } if (ret < 0) goto out; btrfs_release_path(path); - ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, + ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL, NULL, NULL); if (ret < 0) goto out; - *dir = found_key.offset; + *dir = parent_dir; out: btrfs_free_path(path); @@ -2430,7 +2462,8 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); } else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { - TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev); + TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); + TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); } ret = send_cmd(sctx); @@ -3226,7 +3259,8 @@ static int process_all_refs(struct send_ctx *sctx, btrfs_item_key_to_cpu(eb, &found_key, slot); if (found_key.objectid != key.objectid || - found_key.type != key.type) + (found_key.type != BTRFS_INODE_REF_KEY && + found_key.type != BTRFS_INODE_EXTREF_KEY)) break; ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb, @@ -3987,7 +4021,7 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) if (sctx->cur_ino == 0) goto out; if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && - sctx->cmp_key->type <= BTRFS_INODE_REF_KEY) + sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) goto out; if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) goto out; @@ -4033,22 +4067,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) if (ret < 0) goto out; - if (!S_ISLNK(sctx->cur_inode_mode)) { - if (!sctx->parent_root || sctx->cur_inode_new) { + if (!sctx->parent_root || sctx->cur_inode_new) { + need_chown = 1; + if (!S_ISLNK(sctx->cur_inode_mode)) need_chmod = 1; - need_chown = 1; - } else { - ret = get_inode_info(sctx->parent_root, sctx->cur_ino, - NULL, NULL, &right_mode, &right_uid, - &right_gid, NULL); - if (ret < 0) - goto out; + } else { + ret = get_inode_info(sctx->parent_root, sctx->cur_ino, + NULL, NULL, &right_mode, &right_uid, + &right_gid, NULL); + if (ret < 0) + goto out; - if (left_uid != right_uid || left_gid != right_gid) - need_chown = 1; - if (left_mode != right_mode) - need_chmod = 1; - } + if (left_uid != right_uid || left_gid != right_gid) + need_chown = 1; + if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) + need_chmod = 1; } if (S_ISREG(sctx->cur_inode_mode)) { @@ -4335,7 +4368,8 @@ static int changed_cb(struct btrfs_root *left_root, if (key->type == BTRFS_INODE_ITEM_KEY) ret = changed_inode(sctx, result); - else if (key->type == BTRFS_INODE_REF_KEY) + else if (key->type == BTRFS_INODE_REF_KEY || + key->type == BTRFS_INODE_EXTREF_KEY) ret = changed_ref(sctx, result); else if (key->type == BTRFS_XATTR_ITEM_KEY) ret = changed_xattr(sctx, result); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 77db875b5116..04bbfb1052eb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1200,7 +1200,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; - ret = btrfs_update_inode(trans, parent_root, parent_inode); + ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); if (ret) btrfs_abort_transaction(trans, root, ret); fail: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 029b903a4ae3..0f5ebb72a5ea 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1819,6 +1819,13 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) "Failed to relocate sys chunks after " "device initialization. This can be fixed " "using the \"btrfs balance\" command."); + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) == -ENOENT) + return 0; + return PTR_ERR(trans); + } + ret = btrfs_commit_transaction(trans, root); } return ret; diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..6e9ed48064fc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -555,7 +555,7 @@ void emergency_thaw_all(void) */ int sync_mapping_buffers(struct address_space *mapping) { - struct address_space *buffer_mapping = mapping->assoc_mapping; + struct address_space *buffer_mapping = mapping->private_data; if (buffer_mapping == NULL || list_empty(&mapping->private_list)) return 0; @@ -588,10 +588,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) struct address_space *buffer_mapping = bh->b_page->mapping; mark_buffer_dirty(bh); - if (!mapping->assoc_mapping) { - mapping->assoc_mapping = buffer_mapping; + if (!mapping->private_data) { + mapping->private_data = buffer_mapping; } else { - BUG_ON(mapping->assoc_mapping != buffer_mapping); + BUG_ON(mapping->private_data != buffer_mapping); } if (!bh->b_assoc_map) { spin_lock(&buffer_mapping->private_lock); @@ -788,7 +788,7 @@ void invalidate_inode_buffers(struct inode *inode) if (inode_has_buffers(inode)) { struct address_space *mapping = &inode->i_data; struct list_head *list = &mapping->private_list; - struct address_space *buffer_mapping = mapping->assoc_mapping; + struct address_space *buffer_mapping = mapping->private_data; spin_lock(&buffer_mapping->private_lock); while (!list_empty(list)) @@ -811,7 +811,7 @@ int remove_inode_buffers(struct inode *inode) if (inode_has_buffers(inode)) { struct address_space *mapping = &inode->i_data; struct list_head *list = &mapping->private_list; - struct address_space *buffer_mapping = mapping->assoc_mapping; + struct address_space *buffer_mapping = mapping->private_data; spin_lock(&buffer_mapping->private_lock); while (!list_empty(list)) { @@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head) attach_page_buffers(page, head); } +static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) +{ + sector_t retval = ~((sector_t)0); + loff_t sz = i_size_read(bdev->bd_inode); + + if (sz) { + unsigned int sizebits = blksize_bits(size); + retval = (sz >> sizebits); + } + return retval; +} + /* * Initialise the state of a blockdev page's buffers. */ @@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; int uptodate = PageUptodate(page); - sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); + sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size); do { if (!buffer_mapped(bh)) { @@ -1553,6 +1565,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) EXPORT_SYMBOL(unmap_underlying_metadata); /* + * Size is a power-of-two in the range 512..PAGE_SIZE, + * and the case we care about most is PAGE_SIZE. + * + * So this *could* possibly be written with those + * constraints in mind (relevant mostly if some + * architecture has a slow bit-scan instruction) + */ +static inline int block_size_bits(unsigned int blocksize) +{ + return ilog2(blocksize); +} + +static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) +{ + BUG_ON(!PageLocked(page)); + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); + return page_buffers(page); +} + +/* * NOTE! All mapped/uptodate combinations are valid: * * Mapped Uptodate Meaning @@ -1589,19 +1623,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, sector_t block; sector_t last_block; struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; + unsigned int blocksize, bbits; int nr_underway = 0; int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); - BUG_ON(!PageLocked(page)); - - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, + head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); - } /* * Be very careful. We have no exclusion from __set_page_dirty_buffers @@ -1613,9 +1641,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, * handle that here by just cleaning them. */ - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - head = page_buffers(page); bh = head; + blocksize = bh->b_size; + bbits = block_size_bits(blocksize); + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + last_block = (i_size_read(inode) - 1) >> bbits; /* * Get all the dirty buffers mapped to disk addresses and @@ -1806,12 +1837,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, BUG_ON(to > PAGE_CACHE_SIZE); BUG_ON(from > to); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); + head = create_page_buffers(page, inode, 0); + blocksize = head->b_size; + bbits = block_size_bits(blocksize); - bbits = inode->i_blkbits; block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); for(bh = head, block_start = 0; bh != head || !block_start; @@ -1881,11 +1910,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, unsigned blocksize; struct buffer_head *bh, *head; - blocksize = 1 << inode->i_blkbits; + bh = head = page_buffers(page); + blocksize = bh->b_size; - for(bh = head = page_buffers(page), block_start = 0; - bh != head || !block_start; - block_start=block_end, bh = bh->b_this_page) { + block_start = 0; + do { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (!buffer_uptodate(bh)) @@ -1895,7 +1924,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, mark_buffer_dirty(bh); } clear_buffer_new(bh); - } + + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); /* * If this is a partial write which happened to make all buffers @@ -2020,7 +2052,6 @@ EXPORT_SYMBOL(generic_write_end); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from) { - struct inode *inode = page->mapping->host; unsigned block_start, block_end, blocksize; unsigned to; struct buffer_head *bh, *head; @@ -2029,13 +2060,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, if (!page_has_buffers(page)) return 0; - blocksize = 1 << inode->i_blkbits; + head = page_buffers(page); + blocksize = head->b_size; to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); to = from + to; if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) return 0; - head = page_buffers(page); bh = head; block_start = 0; do { @@ -2068,18 +2099,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) struct inode *inode = page->mapping->host; sector_t iblock, lblock; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; - unsigned int blocksize; + unsigned int blocksize, bbits; int nr, i; int fully_mapped = 1; - BUG_ON(!PageLocked(page)); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); + head = create_page_buffers(page, inode, 0); + blocksize = head->b_size; + bbits = block_size_bits(blocksize); - iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; + iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); + lblock = (i_size_read(inode)+blocksize-1) >> bbits; bh = head; nr = 0; i = 0; @@ -2864,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) bio_put(bio); } +/* + * This allows us to do IO even on the odd last sectors + * of a device, even if the bh block size is some multiple + * of the physical sector size. + * + * We'll just truncate the bio to the size of the device, + * and clear the end of the buffer head manually. + * + * Truly out-of-range accesses will turn into actual IO + * errors, this only handles the "we need to be able to + * do IO at the final sector" case. + */ +static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) +{ + sector_t maxsector; + unsigned bytes; + + maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; + if (!maxsector) + return; + + /* + * If the *whole* IO is past the end of the device, + * let it through, and the IO layer will turn it into + * an EIO. + */ + if (unlikely(bio->bi_sector >= maxsector)) + return; + + maxsector -= bio->bi_sector; + bytes = bio->bi_size; + if (likely((bytes >> 9) <= maxsector)) + return; + + /* Uhhuh. We've got a bh that straddles the device size! */ + bytes = maxsector << 9; + + /* Truncate the bio.. */ + bio->bi_size = bytes; + bio->bi_io_vec[0].bv_len = bytes; + + /* ..and clear the end of the buffer for reads */ + if ((rw & RW_MASK) == READ) { + void *kaddr = kmap_atomic(bh->b_page); + memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); + kunmap_atomic(kaddr); + } +} + int submit_bh(int rw, struct buffer_head * bh) { struct bio *bio; @@ -2900,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh) bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; + /* Take care of bh's that straddle the end of the device */ + guard_bh_eod(rw, bio, bh); + bio_get(bio); submit_bio(rw, bio); diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 02ce90972d81..9349bb37a2fe 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -90,6 +90,8 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, *max_len = handle_length; type = 255; } + if (dentry) + dput(dentry); return type; } diff --git a/fs/char_dev.c b/fs/char_dev.c index 3f152b92a94a..afc2bb691780 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -471,9 +471,19 @@ static int exact_lock(dev_t dev, void *data) */ int cdev_add(struct cdev *p, dev_t dev, unsigned count) { + int error; + p->dev = dev; p->count = count; - return kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p); + + error = kobj_map(cdev_map, dev, count, NULL, + exact_match, exact_lock, p); + if (error) + return error; + + kobject_get(p->kobj.parent); + + return 0; } static void cdev_unmap(dev_t dev, unsigned count) @@ -498,14 +508,20 @@ void cdev_del(struct cdev *p) static void cdev_default_release(struct kobject *kobj) { struct cdev *p = container_of(kobj, struct cdev, kobj); + struct kobject *parent = kobj->parent; + cdev_purge(p); + kobject_put(parent); } static void cdev_dynamic_release(struct kobject *kobj) { struct cdev *p = container_of(kobj, struct cdev, kobj); + struct kobject *parent = kobj->parent; + cdev_purge(p); kfree(p); + kobject_put(parent); } static struct kobj_type ktype_cdev_default = { diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index fc783e264420..0fb15bbbe43c 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr) } static void +cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src) +{ + memcpy(dst, src, sizeof(*dst)); + dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS); +} + +static void id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, struct cifs_sid_id **psidid, char *typestr) { @@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, } } - memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid)); + cifs_copy_sid(&(*psidid)->sid, sidptr); (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); (*psidid)->refcount = 0; @@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) * any fields of the node after a reference is put . */ if (test_bit(SID_ID_MAPPED, &psidid->state)) { - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); + cifs_copy_sid(ssid, &psidid->sid); psidid->time = jiffies; /* update ts for accessing */ goto id_sid_out; } @@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) if (IS_ERR(sidkey)) { rc = -EINVAL; cFYI(1, "%s: Can't map and id to a SID", __func__); + } else if (sidkey->datalen < sizeof(struct cifs_sid)) { + rc = -EIO; + cFYI(1, "%s: Downcall contained malformed key " + "(datalen=%hu)", __func__, sidkey->datalen); } else { lsid = (struct cifs_sid *)sidkey->payload.data; - memcpy(&psidid->sid, lsid, - sidkey->datalen < sizeof(struct cifs_sid) ? - sidkey->datalen : sizeof(struct cifs_sid)); - memcpy(ssid, &psidid->sid, - sidkey->datalen < sizeof(struct cifs_sid) ? - sidkey->datalen : sizeof(struct cifs_sid)); + cifs_copy_sid(&psidid->sid, lsid); + cifs_copy_sid(ssid, &psidid->sid); set_bit(SID_ID_MAPPED, &psidid->state); key_put(sidkey); kfree(psidid->sidstr); @@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) return rc; } if (test_bit(SID_ID_MAPPED, &psidid->state)) - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); + cifs_copy_sid(ssid, &psidid->sid); else rc = -EINVAL; } @@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) static void copy_sec_desc(const struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, __u32 sidsoffset) { - int i; - struct cifs_sid *owner_sid_ptr, *group_sid_ptr; struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr; @@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd, owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset); - - nowner_sid_ptr->revision = owner_sid_ptr->revision; - nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth; - for (i = 0; i < 6; i++) - nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i]; - for (i = 0; i < 5; i++) - nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i]; + cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr); /* copy group sid */ group_sid_ptr = (struct cifs_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset + sizeof(struct cifs_sid)); - - ngroup_sid_ptr->revision = group_sid_ptr->revision; - ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth; - for (i = 0; i < 6; i++) - ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i]; - for (i = 0; i < 5; i++) - ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i]; + cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr); return; } @@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, kfree(nowner_sid_ptr); return rc; } - memcpy(owner_sid_ptr, nowner_sid_ptr, - sizeof(struct cifs_sid)); + cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr); kfree(nowner_sid_ptr); *aclflag = CIFS_ACL_OWNER; } @@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, kfree(ngroup_sid_ptr); return rc; } - memcpy(group_sid_ptr, ngroup_sid_ptr, - sizeof(struct cifs_sid)); + cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr); kfree(ngroup_sid_ptr); *aclflag = CIFS_ACL_GROUP; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 7c0a81283645..d3671f2acb29 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -398,7 +398,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, * in network traffic in the other paths. */ if (!(oflags & O_CREAT)) { - struct dentry *res = cifs_lookup(inode, direntry, 0); + struct dentry *res; + + /* + * Check for hashed negative dentry. We have already revalidated + * the dentry and it is fine. No need to perform another lookup. + */ + if (!d_unhashed(direntry)) + return -ENOENT; + + res = cifs_lookup(inode, direntry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index edb25b4bbb95..70b6f4c3a0c1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1794,7 +1794,6 @@ static int cifs_writepages(struct address_space *mapping, struct TCP_Server_Info *server; struct page *page; int rc = 0; - loff_t isize = i_size_read(mapping->host); /* * If wsize is smaller than the page cache size, default to writing @@ -1899,7 +1898,7 @@ retry: */ set_page_writeback(page); - if (page_offset(page) >= isize) { + if (page_offset(page) >= i_size_read(mapping->host)) { done = true; unlock_page(page); end_page_writeback(page); @@ -1932,7 +1931,8 @@ retry: wdata->offset = page_offset(wdata->pages[0]); wdata->pagesz = PAGE_CACHE_SIZE; wdata->tailsz = - min(isize - page_offset(wdata->pages[nr_pages - 1]), + min(i_size_read(mapping->host) - + page_offset(wdata->pages[nr_pages - 1]), (loff_t)PAGE_CACHE_SIZE); wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f9b5d3d6cf33..1c576e871366 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -86,14 +86,17 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { + int err; inode = dentry->d_inode; /* update inode in place if i_ino didn't change */ if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { cifs_fattr_to_inode(inode, fattr); return dentry; } - d_drop(dentry); + err = d_invalidate(dentry); dput(dentry); + if (err) + return NULL; } dentry = d_alloc(parent, name); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 56cc4be87807..34cea2798333 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -766,7 +766,6 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; - FILE_BASIC_INFO info_buf; /* if the file is already open for write, just use that fileid */ open_file = find_writable_file(cinode, true); @@ -817,7 +816,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, netpid = current->tgid; set_via_filehandle: - rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); + rc = CIFSSMBSetFileInfo(xid, tcon, buf, netfid, netpid); if (!rc) cinode->cifsAttrs = le32_to_cpu(buf->Attributes); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index f5054025f9da..e2f57a007029 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -210,6 +210,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, err = get_user(palp, &up->palette); err |= get_user(length, &up->length); + if (err) + return -EFAULT; up_native = compat_alloc_user_space(sizeof(struct video_spu_palette)); err = put_user(compat_ptr(palp), &up_native->palette); @@ -842,6 +844,9 @@ COMPATIBLE_IOCTL(TIOCGDEV) COMPATIBLE_IOCTL(TIOCCBRK) COMPATIBLE_IOCTL(TIOCGSID) COMPATIBLE_IOCTL(TIOCGICOUNT) +COMPATIBLE_IOCTL(TIOCGPKT) +COMPATIBLE_IOCTL(TIOCGPTLCK) +COMPATIBLE_IOCTL(TIOCGEXCL) /* Little t */ COMPATIBLE_IOCTL(TIOCGETD) COMPATIBLE_IOCTL(TIOCSETD) diff --git a/fs/coredump.c b/fs/coredump.c index fd37facac8dc..ce47379bfa61 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -450,11 +450,12 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) cp->file = files[1]; - replace_fd(0, files[0], 0); + err = replace_fd(0, files[0], 0); + fput(files[0]); /* and disallow core files too */ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; - return 0; + return err; } void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b607d92cdf24..153bb1e42e63 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -59,7 +59,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev case S_IFDIR: inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; - inode->i_private = NULL; /* directory inodes start off with i_nlink == 2 * (for "." entry) */ diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 14afbabe6546..472e6befc54d 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -545,37 +545,38 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_unlock(&allocated_ptys_lock); } -int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) +/** + * devpts_pty_new -- create a new inode in /dev/pts/ + * @ptmx_inode: inode of the master + * @device: major+minor of the node to be created + * @index: used as a name of the node + * @priv: what's given back by devpts_get_priv + * + * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill. + */ +struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, + void *priv) { - /* tty layer puts index from devpts_new_index() in here */ - int number = tty->index; - struct tty_driver *driver = tty->driver; - dev_t device = MKDEV(driver->major, driver->minor_start+number); struct dentry *dentry; struct super_block *sb = pts_sb_from_inode(ptmx_inode); - struct inode *inode = new_inode(sb); + struct inode *inode; struct dentry *root = sb->s_root; struct pts_fs_info *fsi = DEVPTS_SB(sb); struct pts_mount_opts *opts = &fsi->mount_opts; - int ret = 0; char s[12]; - /* We're supposed to be given the slave end of a pty */ - BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); - BUG_ON(driver->subtype != PTY_TYPE_SLAVE); - + inode = new_inode(sb); if (!inode) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - inode->i_ino = number + 3; + inode->i_ino = index + 3; inode->i_uid = opts->setuid ? opts->uid : current_fsuid(); inode->i_gid = opts->setgid ? opts->gid : current_fsgid(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|opts->mode, device); - inode->i_private = tty; - tty->driver_data = inode; + inode->i_private = priv; - sprintf(s, "%d", number); + sprintf(s, "%d", index); mutex_lock(&root->d_inode->i_mutex); @@ -585,18 +586,24 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) fsnotify_create(root->d_inode, dentry); } else { iput(inode); - ret = -ENOMEM; + inode = ERR_PTR(-ENOMEM); } mutex_unlock(&root->d_inode->i_mutex); - return ret; + return inode; } -struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) +/** + * devpts_get_priv -- get private data for a slave + * @pts_inode: inode of the slave + * + * Returns whatever was passed as priv in devpts_pty_new for a given inode. + */ +void *devpts_get_priv(struct inode *pts_inode) { struct dentry *dentry; - struct tty_struct *tty; + void *priv = NULL; BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); @@ -605,18 +612,22 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) if (!dentry) return NULL; - tty = NULL; if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) - tty = (struct tty_struct *)pts_inode->i_private; + priv = pts_inode->i_private; dput(dentry); - return tty; + return priv; } -void devpts_pty_kill(struct tty_struct *tty) +/** + * devpts_pty_kill -- remove inode form /dev/pts/ + * @inode: inode of the slave to be removed + * + * This is an inverse operation of devpts_pty_new. + */ +void devpts_pty_kill(struct inode *inode) { - struct inode *inode = tty->driver_data; struct super_block *sb = pts_sb_from_inode(inode); struct dentry *root = sb->s_root; struct dentry *dentry; diff --git a/fs/direct-io.c b/fs/direct-io.c index f86c720dba0e..cf5b44b10c67 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -540,6 +540,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ unsigned long fs_count; /* Number of filesystem-sized blocks */ int create; + unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor; /* * If there was a memory error and we've overwritten all the @@ -554,7 +555,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, fs_count = fs_endblk - fs_startblk + 1; map_bh->b_state = 0; - map_bh->b_size = fs_count << dio->inode->i_blkbits; + map_bh->b_size = fs_count << i_blkbits; /* * For writes inside i_size on a DIO_SKIP_HOLES filesystem we @@ -1053,7 +1054,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, int seg; size_t size; unsigned long addr; - unsigned blkbits = inode->i_blkbits; + unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); + unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; loff_t end = offset; @@ -1149,7 +1151,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, dio->inode = inode; dio->rw = rw; sdio.blkbits = blkbits; - sdio.blkfactor = inode->i_blkbits - blkbits; + sdio.blkfactor = i_blkbits - blkbits; sdio.block_in_file = offset >> blkbits; sdio.get_block = get_block; diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 1897eb1b4b6a..e4242c3f8486 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -1,6 +1,6 @@ menuconfig DLM tristate "Distributed Lock Manager (DLM)" - depends on EXPERIMENTAL && INET + depends on INET depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) select IP_SCTP help diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 871c1abf6029..77c0f70f8fe8 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -337,6 +337,7 @@ enum rsb_flags { RSB_NEW_MASTER2, RSB_RECOVER_CONVERT, RSB_RECOVER_GRANT, + RSB_RECOVER_LVB_INVAL, }; static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index b56950758188..a579f30f237d 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -5393,6 +5393,13 @@ static void purge_dead_list(struct dlm_ls *ls, struct dlm_rsb *r, if ((lkb->lkb_nodeid == nodeid_gone) || dlm_is_removed(ls, lkb->lkb_nodeid)) { + /* tell recover_lvb to invalidate the lvb + because a node holding EX/PW failed */ + if ((lkb->lkb_exflags & DLM_LKF_VALBLK) && + (lkb->lkb_grmode >= DLM_LOCK_PW)) { + rsb_set_flag(r, RSB_RECOVER_LVB_INVAL); + } + del_lkb(r, lkb); /* this put should free the lkb */ @@ -6025,15 +6032,18 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) return error; } -/* The force flag allows the unlock to go ahead even if the lkb isn't granted. - Regardless of what rsb queue the lock is on, it's removed and freed. */ +/* The FORCEUNLOCK flag allows the unlock to go ahead even if the lkb isn't + granted. Regardless of what rsb queue the lock is on, it's removed and + freed. The IVVALBLK flag causes the lvb on the resource to be invalidated + if our lock is PW/EX (it's ignored if our granted mode is smaller.) */ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) { struct dlm_args args; int error; - set_unlock_args(DLM_LKF_FORCEUNLOCK, lkb->lkb_ua, &args); + set_unlock_args(DLM_LKF_FORCEUNLOCK | DLM_LKF_IVVALBLK, + lkb->lkb_ua, &args); error = unlock_lock(ls, lkb, &args); if (error == -DLM_EUNLOCK) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 331ea4f94efd..dd87a31bcc21 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1385,7 +1385,6 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) struct connection *con; struct writequeue_entry *e; int offset = 0; - int users = 0; con = nodeid2con(nodeid, allocation); if (!con) @@ -1399,7 +1398,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) } else { offset = e->end; e->end += len; - users = e->users++; + e->users++; } spin_unlock(&con->writequeue_lock); @@ -1414,7 +1413,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) spin_lock(&con->writequeue_lock); offset = e->end; e->end += len; - users = e->users++; + e->users++; list_add_tail(&e->list, &con->writequeue); spin_unlock(&con->writequeue_lock); goto got_one; diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 4a7a76e42fc3..aedea28a86a1 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -717,8 +717,14 @@ void dlm_recovered_lock(struct dlm_rsb *r) * the VALNOTVALID flag if necessary, and determining the correct lvb contents * based on the lvb's of the locks held on the rsb. * - * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it - * was already set prior to recovery, it's not cleared, regardless of locks. + * RSB_VALNOTVALID is set in two cases: + * + * 1. we are master, but not new, and we purged an EX/PW lock held by a + * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL) + * + * 2. we are a new master, and there are only NL/CR locks left. + * (We could probably improve this by only invaliding in this way when + * the previous master left uncleanly. VMS docs mention that.) * * The LVB contents are only considered for changing when this is a new master * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with @@ -734,6 +740,19 @@ static void recover_lvb(struct dlm_rsb *r) int big_lock_exists = 0; int lvblen = r->res_ls->ls_lvblen; + if (!rsb_flag(r, RSB_NEW_MASTER2) && + rsb_flag(r, RSB_RECOVER_LVB_INVAL)) { + /* case 1 above */ + rsb_set_flag(r, RSB_VALNOTVALID); + return; + } + + if (!rsb_flag(r, RSB_NEW_MASTER2)) + return; + + /* we are the new master, so figure out if VALNOTVALID should + be set, and set the rsb lvb from the best lkb available. */ + list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) continue; @@ -772,13 +791,10 @@ static void recover_lvb(struct dlm_rsb *r) if (!lock_lvb_exists) goto out; + /* lvb is invalidated if only NL/CR locks remain */ if (!big_lock_exists) rsb_set_flag(r, RSB_VALNOTVALID); - /* don't mess with the lvb unless we're the new master */ - if (!rsb_flag(r, RSB_NEW_MASTER2)) - goto out; - if (!r->res_lvbptr) { r->res_lvbptr = dlm_allocate_lvb(r->res_ls); if (!r->res_lvbptr) @@ -852,12 +868,19 @@ void dlm_recover_rsbs(struct dlm_ls *ls) if (is_master(r)) { if (rsb_flag(r, RSB_RECOVER_CONVERT)) recover_conversion(r); + + /* recover lvb before granting locks so the updated + lvb/VALNOTVALID is presented in the completion */ + recover_lvb(r); + if (rsb_flag(r, RSB_NEW_MASTER2)) recover_grant(r); - recover_lvb(r); count++; + } else { + rsb_clear_flag(r, RSB_VALNOTVALID); } rsb_clear_flag(r, RSB_RECOVER_CONVERT); + rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL); rsb_clear_flag(r, RSB_NEW_MASTER2); unlock_rsb(r); } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index da72250ddc1c..cd96649bfe62 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p) /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ static inline int ep_op_has_event(int op) { - return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; + return op != EPOLL_CTL_DEL; } /* Initialize the poll safe wake up structure */ @@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) return 0; } -/* - * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item - * had no event flags set, indicating that another thread may be currently - * handling that item's events (in the case that EPOLLONESHOT was being - * used). Otherwise a zero result indicates that the item has been disabled - * from receiving events. A disabled item may be re-enabled via - * EPOLL_CTL_MOD. Must be called with "mtx" held. - */ -static int ep_disable(struct eventpoll *ep, struct epitem *epi) -{ - int result = 0; - unsigned long flags; - - spin_lock_irqsave(&ep->lock, flags); - if (epi->event.events & ~EP_PRIVATE_BITS) { - if (ep_is_linked(&epi->rdllink)) - list_del_init(&epi->rdllink); - /* Ensure ep_poll_callback will not add epi back onto ready - list: */ - epi->event.events &= EP_PRIVATE_BITS; - } - else - result = -EBUSY; - spin_unlock_irqrestore(&ep->lock, flags); - - return result; -} - static void ep_free(struct eventpoll *ep) { struct rb_node *rbp; @@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } + + #define PATH_ARR_SIZE 5 /* * These are the number paths of length 1 to 5, that we are allowing to emanate @@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, } else error = -ENOENT; break; - case EPOLL_CTL_DISABLE: - if (epi) - error = ep_disable(ep, epi); - else - error = -ENOENT; - break; } mutex_unlock(&ep->mtx); diff --git a/fs/exec.c b/fs/exec.c index 8b9011b67041..0039055b1fc6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1083,7 +1083,8 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ set_fs(USER_DS); - current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD); + current->flags &= + ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE); flush_thread(); current->personality &= ~bprm->per_clear; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 6c205d0c565b..fa04d023177e 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -469,7 +469,7 @@ static int parse_options(char *options, struct super_block *sb) uid = make_kuid(current_user_ns(), option); if (!uid_valid(uid)) { ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option); - return -1; + return 0; } sbi->s_resuid = uid; @@ -480,7 +480,7 @@ static int parse_options(char *options, struct super_block *sb) gid = make_kgid(current_user_ns(), option); if (!gid_valid(gid)) { ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option); - return -1; + return 0; } sbi->s_resgid = gid; break; diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 7320a66e958f..22548f56197b 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -2101,8 +2101,9 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) end = start + (range->len >> sb->s_blocksize_bits) - 1; minlen = range->minlen >> sb->s_blocksize_bits; - if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) || - unlikely(start >= max_blks)) + if (minlen > EXT3_BLOCKS_PER_GROUP(sb) || + start >= max_blks || + range->len < sb->s_blocksize) return -EINVAL; if (end >= max_blks) end = max_blks - 1; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 8f4fddac01a6..890b8947c546 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -46,8 +46,7 @@ static struct buffer_head *ext3_append(handle_t *handle, *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - bh = ext3_bread(handle, inode, *block, 1, err); - if (bh) { + if ((bh = ext3_dir_bread(handle, inode, *block, 1, err))) { inode->i_size += inode->i_sb->s_blocksize; EXT3_I(inode)->i_disksize = inode->i_size; *err = ext3_journal_get_write_access(handle, bh); @@ -339,8 +338,10 @@ dx_probe(struct qstr *entry, struct inode *dir, u32 hash; frame->bh = NULL; - if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) + if (!(bh = ext3_dir_bread(NULL, dir, 0, 0, err))) { + *err = ERR_BAD_DX_DIR; goto fail; + } root = (struct dx_root *) bh->b_data; if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && @@ -436,8 +437,10 @@ dx_probe(struct qstr *entry, struct inode *dir, frame->entries = entries; frame->at = at; if (!indirect--) return frame; - if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) + if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(at), 0, err))) { + *err = ERR_BAD_DX_DIR; goto fail2; + } at = entries = ((struct dx_node *) bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { ext3_warning(dir->i_sb, __func__, @@ -535,8 +538,8 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash, * block so no check is necessary */ while (num_frames--) { - if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), - 0, &err))) + if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(p->at), + 0, &err))) return err; /* Failure */ p++; brelse (p->bh); @@ -559,10 +562,11 @@ static int htree_dirblock_to_tree(struct file *dir_file, { struct buffer_head *bh; struct ext3_dir_entry_2 *de, *top; - int err, count = 0; + int err = 0, count = 0; dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); - if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) + + if (!(bh = ext3_dir_bread(NULL, dir, block, 0, &err))) return err; de = (struct ext3_dir_entry_2 *) bh->b_data; @@ -976,7 +980,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, return NULL; do { block = dx_get_block(frame->at); - if (!(bh = ext3_bread (NULL,dir, block, 0, err))) + if (!(bh = ext3_dir_bread (NULL, dir, block, 0, err))) goto errout; retval = search_dirblock(bh, dir, entry, @@ -1458,9 +1462,9 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, } blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { - bh = ext3_bread(handle, dir, block, 0, &retval); - if(!bh) + if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval))) return retval; + retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); if (retval != -ENOSPC) return retval; @@ -1500,7 +1504,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, entries = frame->entries; at = frame->at; - if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) + if (!(bh = ext3_dir_bread(handle, dir, dx_get_block(frame->at), 0, &err))) goto cleanup; BUFFER_TRACE(bh, "get_write_access"); @@ -1790,8 +1794,7 @@ retry: inode->i_op = &ext3_dir_inode_operations; inode->i_fop = &ext3_dir_operations; inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) + if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err))) goto out_clear_inode; BUFFER_TRACE(dir_block, "get_write_access"); @@ -1859,7 +1862,7 @@ static int empty_dir (struct inode * inode) sb = inode->i_sb; if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || - !(bh = ext3_bread (NULL, inode, 0, 0, &err))) { + !(bh = ext3_dir_bread(NULL, inode, 0, 0, &err))) { if (err) ext3_error(inode->i_sb, __func__, "error %d reading directory #%lu offset 0", @@ -1890,9 +1893,8 @@ static int empty_dir (struct inode * inode) (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { err = 0; brelse (bh); - bh = ext3_bread (NULL, inode, - offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err); - if (!bh) { + if (!(bh = ext3_dir_bread (NULL, inode, + offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err))) { if (err) ext3_error(sb, __func__, "error %d reading directory" @@ -2388,7 +2390,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, goto end_rename; } retval = -EIO; - dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval); + dir_bh = ext3_dir_bread(handle, old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h index f2ce2b0065c9..46304d8c9f0a 100644 --- a/fs/ext3/namei.h +++ b/fs/ext3/namei.h @@ -6,3 +6,22 @@ */ extern struct dentry *ext3_get_parent(struct dentry *child); + +static inline struct buffer_head *ext3_dir_bread(handle_t *handle, + struct inode *inode, + int block, int create, + int *err) +{ + struct buffer_head *bh; + + bh = ext3_bread(handle, inode, block, create, err); + + if (!bh && !(*err)) { + *err = -EIO; + ext3_error(inode->i_sb, __func__, + "Directory hole detected on inode %lu\n", + inode->i_ino); + return NULL; + } + return bh; +} diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 29e79713c7eb..5366393528df 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1001,7 +1001,7 @@ static int parse_options (char *options, struct super_block *sb, uid = make_kuid(current_user_ns(), option); if (!uid_valid(uid)) { ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option); - return -1; + return 0; } sbi->s_resuid = uid; @@ -1012,7 +1012,7 @@ static int parse_options (char *options, struct super_block *sb, gid = make_kgid(current_user_ns(), option); if (!gid_valid(gid)) { ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option); - return -1; + return 0; } sbi->s_resgid = gid; break; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1b5089067d01..cf1821784a16 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -174,8 +174,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); return; } memset(bh->b_data, 0, sb->s_blocksize); @@ -212,8 +211,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), sb->s_blocksize * 8, bh->b_data); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); ext4_group_desc_csum_set(sb, block_group, gdp); } @@ -350,7 +348,7 @@ void ext4_validate_block_bitmap(struct super_block *sb, return; } if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, - desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) { + desc, bh))) { ext4_unlock_group(sb, block_group); ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); return; diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 5c2d1813ebe9..3285aa5a706a 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -58,11 +58,12 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) + struct buffer_head *bh) { __u32 hi; __u32 provided, calculated; struct ext4_sb_info *sbi = EXT4_SB(sb); + int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) @@ -84,8 +85,9 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz) + struct buffer_head *bh) { + int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; __u32 csum; struct ext4_sb_info *sbi = EXT4_SB(sb); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3ab2539b7b2e..3c20de1d59d0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1882,10 +1882,10 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, struct buffer_head *bh, int sz); void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); + struct buffer_head *bh); int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *gdp, - struct buffer_head *bh, int sz); + struct buffer_head *bh); /* balloc.c */ extern void ext4_validate_block_bitmap(struct super_block *sb, @@ -2063,8 +2063,7 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); extern int ext4_calculate_overhead(struct super_block *sb); extern int ext4_superblock_csum_verify(struct super_block *sb, struct ext4_super_block *es); -extern void ext4_superblock_csum_set(struct super_block *sb, - struct ext4_super_block *es); +extern void ext4_superblock_csum_set(struct super_block *sb); extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern void ext4_kvfree(void *ptr); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index bfa65b49d424..b4323ba846b5 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -143,17 +143,13 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; + ext4_superblock_csum_set(sb); if (ext4_handle_valid(handle)) { - ext4_superblock_csum_set(sb, - (struct ext4_super_block *)bh->b_data); err = jbd2_journal_dirty_metadata(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); - } else { - ext4_superblock_csum_set(sb, - (struct ext4_super_block *)bh->b_data); + } else mark_buffer_dirty(bh); - } return err; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1c94cca35ed1..7011ac967208 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -52,6 +52,9 @@ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ +#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ +#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ + static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { @@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle, unsigned int ee_len, depth; int err = 0; + BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == + (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); + ext_debug("ext4_split_extents_at: inode %lu, logical" "block %llu\n", inode->i_ino, (unsigned long long)split); @@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { - err = ext4_ext_zeroout(inode, &orig_ex); + if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { + if (split_flag & EXT4_EXT_DATA_VALID1) + err = ext4_ext_zeroout(inode, ex2); + else + err = ext4_ext_zeroout(inode, ex); + } else + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) goto fix_extent_len; /* update the extent length and mark as initialized */ @@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle, uninitialized = ext4_ext_is_uninitialized(ex); if (map->m_lblk + map->m_len < ee_block + ee_len) { - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? - EXT4_EXT_MAY_ZEROOUT : 0; + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1 | EXT4_EXT_MARK_UNINIT2; + if (split_flag & EXT4_EXT_DATA_VALID2) + split_flag1 |= EXT4_EXT_DATA_VALID1; err = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); if (err) @@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle, return PTR_ERR(path); if (map->m_lblk >= ee_block) { - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? - EXT4_EXT_MAY_ZEROOUT : 0; + split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | + EXT4_EXT_DATA_VALID2); if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1; if (split_flag & EXT4_EXT_MARK_UNINIT2) @@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle, split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= EXT4_EXT_MARK_UNINIT2; - + if (flags & EXT4_GET_BLOCKS_CONVERT) + split_flag |= EXT4_EXT_DATA_VALID2; flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags); } static int ext4_convert_unwritten_extents_endio(handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path) + struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path) { struct ext4_extent *ex; + ext4_lblk_t ee_block; + unsigned int ee_len; int depth; int err = 0; depth = ext_depth(inode); ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)le32_to_cpu(ex->ee_block), - ext4_ext_get_actual_len(ex)); + (unsigned long long)ee_block, ee_len); + + /* If extent is larger than requested then split is required */ + if (ee_block != map->m_lblk || ee_len > map->m_len) { + err = ext4_split_unwritten_extents(handle, inode, map, path, + EXT4_GET_BLOCKS_CONVERT); + if (err < 0) + goto out; + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + } err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, } /* IO end_io complete, convert the filled extent to written */ if ((flags & EXT4_GET_BLOCKS_CONVERT)) { - ret = ext4_convert_unwritten_extents_endio(handle, inode, + ret = ext4_convert_unwritten_extents_endio(handle, inode, map, path); if (ret >= 0) { ext4_update_inode_fsync_trans(handle, inode, 1); @@ -4428,6 +4463,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) */ if (len <= EXT_UNINIT_MAX_LEN << blkbits) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; + + /* Prevent race condition between unwritten */ + ext4_flush_unwritten_io(inode); retry: while (ret >= 0 && ret < max_blocks) { map.m_lblk = map.m_lblk + ret; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index fa36372f3fdf..3a100e7a62a8 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -725,6 +725,10 @@ repeat_in_this_group: "inode=%lu", ino + 1); continue; } + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode_bitmap_bh); + if (err) + goto fail; ext4_lock_group(sb, group); ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); ext4_unlock_group(sb, group); @@ -738,6 +742,11 @@ repeat_in_this_group: goto out; got: + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); + if (err) + goto fail; + /* We may have to initialize the block bitmap if it isn't already */ if (ext4_has_group_desc_csum(sb) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { @@ -762,9 +771,7 @@ got: ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); ext4_block_bitmap_csum_set(sb, group, gdp, - block_bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / - 8); + block_bitmap_bh); ext4_group_desc_csum_set(sb, group, gdp); } ext4_unlock_group(sb, group); @@ -773,11 +780,6 @@ got: goto fail; } - BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, inode_bitmap_bh); - if (err) - goto fail; - BUFFER_TRACE(group_desc_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, group_desc_bh); if (err) @@ -825,11 +827,6 @@ got: } ext4_unlock_group(sb, group); - BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); - if (err) - goto fail; - BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); if (err) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f8b27bf80aca..526e55358606 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2805,8 +2805,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; ext4_free_group_clusters_set(sb, gdp, len); - ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh); ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); @@ -4666,8 +4665,7 @@ do_more: ret = ext4_free_group_clusters(sb, gdp) + count_clusters; ext4_free_group_clusters_set(sb, gdp, ret); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh); ext4_group_desc_csum_set(sb, block_group, gdp); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); @@ -4811,8 +4809,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, mb_free_blocks(NULL, &e4b, bit, count); blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); ext4_free_group_clusters_set(sb, desc, blk_free_count); - ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh); ext4_group_desc_csum_set(sb, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, @@ -4993,8 +4990,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) minlen = EXT4_NUM_B2C(EXT4_SB(sb), range->minlen >> sb->s_blocksize_bits); - if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || - unlikely(start >= max_blks)) + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) || + start >= max_blks || + range->len < sb->s_blocksize) return -EINVAL; if (end >= max_blks) end = max_blks - 1; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 7a75e1086961..47bf06a2765d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb, bh = ext4_get_bitmap(sb, group_data->block_bitmap); if (!bh) return -EIO; - ext4_block_bitmap_csum_set(sb, group, gdp, bh, - EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_block_bitmap_csum_set(sb, group, gdp, bh); brelse(bh); return 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7265a0367476..80928f716850 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -143,9 +143,10 @@ int ext4_superblock_csum_verify(struct super_block *sb, return es->s_checksum == ext4_superblock_csum(sb, es); } -void ext4_superblock_csum_set(struct super_block *sb, - struct ext4_super_block *es) +void ext4_superblock_csum_set(struct super_block *sb) { + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) return; @@ -1963,7 +1964,7 @@ static int ext4_fill_flex_info(struct super_block *sb) sbi->s_log_groups_per_flex = 0; return 1; } - groups_per_flex = 1 << sbi->s_log_groups_per_flex; + groups_per_flex = 1U << sbi->s_log_groups_per_flex; err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); if (err) @@ -4381,7 +4382,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); BUFFER_TRACE(sbh, "marking dirty"); - ext4_superblock_csum_set(sb, es); + ext4_superblock_csum_set(sb); mark_buffer_dirty(sbh); if (sync) { error = sync_dirty_buffer(sbh); diff --git a/fs/file.c b/fs/file.c index d3b5fa80b71b..eff23162485f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -685,7 +685,6 @@ void do_close_on_exec(struct files_struct *files) struct fdtable *fdt; /* exec unshares first */ - BUG_ON(atomic_read(&files->count) != 1); spin_lock(&files->file_lock); for (i = 0; ; i++) { unsigned long set; @@ -900,7 +899,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) return __close_fd(files, fd); if (fd >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; + return -EBADF; spin_lock(&files->file_lock); err = expand_files(files, fd); @@ -926,7 +925,7 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) return -EINVAL; if (newfd >= rlimit(RLIMIT_NOFILE)) - return -EMFILE; + return -EBADF; spin_lock(&files->file_lock); err = expand_files(files, newfd); @@ -995,16 +994,18 @@ int iterate_fd(struct files_struct *files, unsigned n, const void *p) { struct fdtable *fdt; - struct file *file; int res = 0; if (!files) return 0; spin_lock(&files->file_lock); - fdt = files_fdtable(files); - while (!res && n < fdt->max_fds) { - file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); - if (file) - res = f(p, file, n); + for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { + struct file *file; + file = rcu_dereference_check_fdtable(files, fdt->fd[n]); + if (!file) + continue; + res = f(p, file, n); + if (res) + break; } spin_unlock(&files->file_lock); return res; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 51ea267d444c..3e3422f7f0a4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) static void inode_sync_complete(struct inode *inode) { inode->i_state &= ~I_SYNC; + /* If inode is clean an unused, put it into LRU now... */ + inode_add_lru(inode); /* Waiters must see I_SYNC cleared before being woken up */ smp_mb(); wake_up_bit(&inode->i_state, __I_SYNC); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 0def0504afc1..e056b4ce4877 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) struct gfs2_holder i_gh; int error; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - error = gfs2_glock_nq(&i_gh); - if (error == 0) { - file_accessed(file); - gfs2_glock_dq(&i_gh); - } - gfs2_holder_uninit(&i_gh); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); if (error) return error; + /* grab lock to update inode */ + gfs2_glock_dq_uninit(&i_gh); + file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; @@ -677,10 +675,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, size_t writesize = iov_length(iov, nr_segs); struct dentry *dentry = file->f_dentry; struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_sbd *sdp; int ret; - sdp = GFS2_SB(file->f_mapping->host); ret = gfs2_rs_alloc(ip); if (ret) return ret; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e6c2fd53cab2..0f22d09f358d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -768,7 +768,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping->host = s->s_bdev->bd_inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->assoc_mapping = NULL; + mapping->private_data = NULL; mapping->backing_dev_info = s->s_bdi; mapping->writeback_index = 0; } diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 8ff95a2d54ee..9ceccb1595a3 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct gfs2_meta_header *mh; struct gfs2_trans *tr; - lock_buffer(bd->bd_bh); - gfs2_log_lock(sdp); tr = current->journal_info; tr->tr_touched = 1; if (!list_empty(&bd->bd_list)) - goto out; + return; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; @@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) sdp->sd_log_num_buf++; list_add(&bd->bd_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd->bd_bh); } static void gfs2_check_magic(struct buffer_head *bh) @@ -621,7 +616,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) static void revoke_lo_before_commit(struct gfs2_sbd *sdp) { - struct gfs2_log_descriptor *ld; struct gfs2_meta_header *mh; unsigned int offset; struct list_head *head = &sdp->sd_log_le_revoke; @@ -634,7 +628,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64)); page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke); - ld = page_address(page); offset = sizeof(struct gfs2_log_descriptor); list_for_each_entry(bd, head, bd_list) { @@ -777,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct address_space *mapping = bd->bd_bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); - lock_buffer(bd->bd_bh); - gfs2_log_lock(sdp); if (tr) tr->tr_touched = 1; if (!list_empty(&bd->bd_list)) - goto out; + return; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); if (gfs2_is_jdata(ip)) { @@ -793,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) } else { list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered); } -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd->bd_bh); } /** diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 40c4b0d42fa8..c5af8e18f27a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) struct gfs2_quota_data **qd; int error; - if (ip->i_res == NULL) - gfs2_rs_alloc(ip); + if (ip->i_res == NULL) { + error = gfs2_rs_alloc(ip); + if (error) + return error; + } qd = ip->i_res->rs_qa_qd; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3cc402ce6fea..38fe18f2f055 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) */ int gfs2_rs_alloc(struct gfs2_inode *ip) { - int error = 0; struct gfs2_blkreserv *res; if (ip->i_res) @@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); if (!res) - error = -ENOMEM; + return -ENOMEM; RB_CLEAR_NODE(&res->rs_node); @@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) else ip->i_res = res; up_write(&ip->i_rw_mutex); - return error; + return 0; } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) @@ -1263,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) int ret = 0; u64 amt; u64 trimmed = 0; + u64 start, end, minlen; unsigned int x; + unsigned bs_shift = sdp->sd_sb.sb_bsize_shift; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1271,19 +1272,25 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!blk_queue_discard(q)) return -EOPNOTSUPP; - if (argp == NULL) { - r.start = 0; - r.len = ULLONG_MAX; - r.minlen = 0; - } else if (copy_from_user(&r, argp, sizeof(r))) + if (copy_from_user(&r, argp, sizeof(r))) return -EFAULT; ret = gfs2_rindex_update(sdp); if (ret) return ret; - rgd = gfs2_blk2rgrpd(sdp, r.start, 0); - rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); + start = r.start >> bs_shift; + end = start + (r.len >> bs_shift); + minlen = max_t(u64, r.minlen, + q->limits.discard_granularity) >> bs_shift; + + rgd = gfs2_blk2rgrpd(sdp, start, 0); + rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); + + if (end <= start || + minlen > sdp->sd_max_rg_data || + start > rgd_end->rd_data0 + rgd_end->rd_data) + return -EINVAL; while (1) { @@ -1295,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x < rgd->rd_length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; - ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); + ret = gfs2_rgrp_send_discards(sdp, + rgd->rd_data0, NULL, bi, minlen, + &amt); if (ret) { gfs2_glock_dq_uninit(&gh); goto out; @@ -1324,7 +1333,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) out: r.len = trimmed << 9; - if (argp && copy_to_user(argp, &r, sizeof(r))) + if (copy_to_user(argp, &r, sizeof(r))) return -EFAULT; return ret; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index bc737261f234..d6488674d916 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) return; } need_unlock = 1; - } + } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) + return; if (current->journal_info == NULL) { ret = gfs2_trans_begin(sdp, RES_DINODE, 0); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index adbd27875ef9..413627072f36 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_bufdata *bd; + lock_buffer(bh); + gfs2_log_lock(sdp); bd = bh->b_private; if (bd) gfs2_assert(sdp, bd->bd_gl == gl); else { + gfs2_log_unlock(sdp); + unlock_buffer(bh); gfs2_attach_bufdata(gl, bh, meta); bd = bh->b_private; + lock_buffer(bh); + gfs2_log_lock(sdp); } lops_add(sdp, bd); + gfs2_log_unlock(sdp); + unlock_buffer(bh); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c5bc355d8243..4a55f35a6ced 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -151,8 +151,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; if (len & ~huge_page_mask(h)) return -EINVAL; @@ -173,39 +173,13 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return addr; } - if (len > mm->cached_hole_size) - start_addr = mm->free_area_cache; - else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - addr = ALIGN(start_addr, huge_page_size(h)); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = ALIGN(vma->vm_end, huge_page_size(h)); - } + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); } #endif @@ -608,11 +582,11 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, int rc; rc = migrate_huge_page_move_mapping(mapping, newpage, page); - if (rc) + if (rc != MIGRATEPAGE_SUCCESS) return rc; migrate_page_copy(newpage, page); - return 0; + return MIGRATEPAGE_SUCCESS; } static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -923,7 +897,7 @@ static struct file_system_type hugetlbfs_fs_type = { .kill_sb = kill_litter_super, }; -static struct vfsmount *hugetlbfs_vfsmount; +static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; static int can_do_hugetlb_shm(void) { @@ -932,9 +906,22 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(shm_group); } +static int get_hstate_idx(int page_size_log) +{ + struct hstate *h; + + if (!page_size_log) + return default_hstate_idx; + h = size_to_hstate(1 << page_size_log); + if (!h) + return -1; + return h - hstates; +} + struct file *hugetlb_file_setup(const char *name, unsigned long addr, size_t size, vm_flags_t acctflag, - struct user_struct **user, int creat_flags) + struct user_struct **user, + int creat_flags, int page_size_log) { int error = -ENOMEM; struct file *file; @@ -944,9 +931,14 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, struct qstr quick_string; struct hstate *hstate; unsigned long num_pages; + int hstate_idx; + + hstate_idx = get_hstate_idx(page_size_log); + if (hstate_idx < 0) + return ERR_PTR(-ENODEV); *user = NULL; - if (!hugetlbfs_vfsmount) + if (!hugetlbfs_vfsmount[hstate_idx]) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { @@ -963,7 +955,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, } } - root = hugetlbfs_vfsmount->mnt_root; + root = hugetlbfs_vfsmount[hstate_idx]->mnt_root; quick_string.name = name; quick_string.len = strlen(quick_string.name); quick_string.hash = 0; @@ -971,7 +963,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, if (!path.dentry) goto out_shm_unlock; - path.mnt = mntget(hugetlbfs_vfsmount); + path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); error = -ENOSPC; inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0); if (!inode) @@ -1011,8 +1003,9 @@ out_shm_unlock: static int __init init_hugetlbfs_fs(void) { + struct hstate *h; int error; - struct vfsmount *vfsmount; + int i; error = bdi_init(&hugetlbfs_backing_dev_info); if (error) @@ -1029,14 +1022,26 @@ static int __init init_hugetlbfs_fs(void) if (error) goto out; - vfsmount = kern_mount(&hugetlbfs_fs_type); + i = 0; + for_each_hstate(h) { + char buf[50]; + unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); - if (!IS_ERR(vfsmount)) { - hugetlbfs_vfsmount = vfsmount; - return 0; - } + snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); + hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, + buf); - error = PTR_ERR(vfsmount); + if (IS_ERR(hugetlbfs_vfsmount[i])) { + pr_err("hugetlb: Cannot mount internal hugetlbfs for " + "page size %uK", ps_kb); + error = PTR_ERR(hugetlbfs_vfsmount[i]); + hugetlbfs_vfsmount[i] = NULL; + } + i++; + } + /* Non default hstates are optional */ + if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) + return 0; out: kmem_cache_destroy(hugetlbfs_inode_cachep); @@ -1047,13 +1052,19 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + struct hstate *h; + int i; + + /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); - kern_unmount(hugetlbfs_vfsmount); + i = 0; + for_each_hstate(h) + kern_unmount(hugetlbfs_vfsmount[i++]); unregister_filesystem(&hugetlbfs_fs_type); bdi_destroy(&hugetlbfs_backing_dev_info); } diff --git a/fs/inode.c b/fs/inode.c index b03c71957246..14084b72b259 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -165,7 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->host = inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); - mapping->assoc_mapping = NULL; + mapping->private_data = NULL; mapping->backing_dev_info = &default_backing_dev_info; mapping->writeback_index = 0; @@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode) spin_unlock(&inode->i_sb->s_inode_lru_lock); } +/* + * Add inode to LRU if needed (inode is unused and clean). + * + * Needs inode->i_lock held. + */ +void inode_add_lru(struct inode *inode) +{ + if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && + !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) + inode_lru_list_add(inode); +} + + static void inode_lru_list_del(struct inode *inode) { spin_lock(&inode->i_sb->s_inode_lru_lock); @@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode) if (!drop && (sb->s_flags & MS_ACTIVE)) { inode->i_state |= I_REFERENCED; - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - inode_lru_list_add(inode); + inode_add_lru(inode); spin_unlock(&inode->i_lock); return; } diff --git a/fs/internal.h b/fs/internal.h index 916b7cbf3e3e..2f6af7f645eb 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f); * inode.c */ extern spinlock_t inode_sb_list_lock; +extern void inode_add_lru(struct inode *inode); /* * fs-writeback.c diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 78b7f84241d4..7f5120bf0ec2 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1961,7 +1961,9 @@ retry: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); spin_unlock(&journal->j_state_lock); + unlock_buffer(bh); log_wait_commit(journal, tid); + lock_buffer(bh); goto retry; } /* diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 60ef3fb707ff..1506673c087e 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_raw_inode ri; + uint32_t alloc_len = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; + jffs2_dbg(1, "%s()\n", __func__); + + if (pageofs > inode->i_size) { + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + if (ret) + return ret; + } + + mutex_lock(&f->sem); pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) + if (!pg) { + if (alloc_len) + jffs2_complete_reservation(c); + mutex_unlock(&f->sem); return -ENOMEM; + } *pagep = pg; - jffs2_dbg(1, "%s()\n", __func__); - - if (pageofs > inode->i_size) { + if (alloc_len) { /* Make new hole frag from old EOF to new page */ - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; - uint32_t alloc_len; jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs); - ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); - if (ret) - goto out_page; - - mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, if (IS_ERR(fn)) { ret = PTR_ERR(fn); jffs2_complete_reservation(c); - mutex_unlock(&f->sem); goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); @@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); - mutex_unlock(&f->sem); goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; - mutex_unlock(&f->sem); } /* @@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, * case of a short-copy. */ if (!PageUptodate(pg)) { - mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); - mutex_unlock(&f->sem); if (ret) goto out_page; } + mutex_unlock(&f->sem); jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); return ret; out_page: unlock_page(pg); page_cache_release(pg); + mutex_unlock(&f->sem); return ret; } diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c index 9947563e4175..dfcd50304559 100644 --- a/fs/jfs/jfs_discard.c +++ b/fs/jfs/jfs_discard.c @@ -83,7 +83,7 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range) struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; struct super_block *sb = ipbmap->i_sb; int agno, agno_end; - s64 start, end, minlen; + u64 start, end, minlen; u64 trimmed = 0; /** @@ -93,15 +93,19 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range) * minlen: minimum extent length in Bytes */ start = range->start >> sb->s_blocksize_bits; - if (start < 0) - start = 0; end = start + (range->len >> sb->s_blocksize_bits) - 1; - if (end >= bmp->db_mapsize) - end = bmp->db_mapsize - 1; minlen = range->minlen >> sb->s_blocksize_bits; - if (minlen <= 0) + if (minlen == 0) minlen = 1; + if (minlen > bmp->db_agsize || + start >= bmp->db_mapsize || + range->len < sb->s_blocksize) + return -EINVAL; + + if (end >= bmp->db_mapsize) + end = bmp->db_mapsize - 1; + /** * we trim all ag's within the range */ diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index d269ada7670e..982d2676e1f8 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr, { __be32 *p; - BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD); + WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD); p = xdr_reserve_space(xdr, 4); *p = stat; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e4fb3ba5a58a..3d7e09bcc0e9 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -85,29 +85,38 @@ static struct rpc_clnt *nsm_create(struct net *net) return rpc_create(&args); } +static struct rpc_clnt *nsm_client_set(struct lockd_net *ln, + struct rpc_clnt *clnt) +{ + spin_lock(&ln->nsm_clnt_lock); + if (ln->nsm_users == 0) { + if (clnt == NULL) + goto out; + ln->nsm_clnt = clnt; + } + clnt = ln->nsm_clnt; + ln->nsm_users++; +out: + spin_unlock(&ln->nsm_clnt_lock); + return clnt; +} + static struct rpc_clnt *nsm_client_get(struct net *net) { - static DEFINE_MUTEX(nsm_create_mutex); - struct rpc_clnt *clnt; + struct rpc_clnt *clnt, *new; struct lockd_net *ln = net_generic(net, lockd_net_id); - spin_lock(&ln->nsm_clnt_lock); - if (ln->nsm_users) { - ln->nsm_users++; - clnt = ln->nsm_clnt; - spin_unlock(&ln->nsm_clnt_lock); + clnt = nsm_client_set(ln, NULL); + if (clnt != NULL) goto out; - } - spin_unlock(&ln->nsm_clnt_lock); - mutex_lock(&nsm_create_mutex); - clnt = nsm_create(net); - if (!IS_ERR(clnt)) { - ln->nsm_clnt = clnt; - smp_wmb(); - ln->nsm_users = 1; - } - mutex_unlock(&nsm_create_mutex); + clnt = new = nsm_create(net); + if (IS_ERR(clnt)) + goto out; + + clnt = nsm_client_set(ln, new); + if (clnt != new) + rpc_shutdown_client(new); out: return clnt; } @@ -115,18 +124,16 @@ out: static void nsm_client_put(struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); - struct rpc_clnt *clnt = ln->nsm_clnt; - int shutdown = 0; + struct rpc_clnt *clnt = NULL; spin_lock(&ln->nsm_clnt_lock); - if (ln->nsm_users) { - if (--ln->nsm_users) - ln->nsm_clnt = NULL; - shutdown = !ln->nsm_users; + ln->nsm_users--; + if (ln->nsm_users == 0) { + clnt = ln->nsm_clnt; + ln->nsm_clnt = NULL; } spin_unlock(&ln->nsm_clnt_lock); - - if (shutdown) + if (clnt != NULL) rpc_shutdown_client(clnt); } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3009a365e082..21171f0c6477 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -68,7 +68,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Obtain file pointer. Not used by FREE_ALL call. */ if (filp != NULL) { - if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0) + error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh)); + if (error != 0) goto no_locks; *filp = file; diff --git a/fs/namei.c b/fs/namei.c index d1895f308156..5f4cdf3ad913 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -705,8 +705,8 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki path_put(link); } -int sysctl_protected_symlinks __read_mostly = 1; -int sysctl_protected_hardlinks __read_mostly = 1; +int sysctl_protected_symlinks __read_mostly = 0; +int sysctl_protected_hardlinks __read_mostly = 0; /** * may_follow_link - Check symlink following for unsafe situations @@ -2131,6 +2131,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) if (!len) return ERR_PTR(-EACCES); + if (unlikely(name[0] == '.')) { + if (len < 2 || (len == 2 && name[1] == '.')) + return ERR_PTR(-EACCES); + } + while (len--) { c = *(const unsigned char *)name++; if (c == '/' || c == '\0') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 9a521fb39869..5088b57b078a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -241,7 +241,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, svc_exit_thread(cb_info->rqst); cb_info->rqst = NULL; cb_info->task = NULL; - return PTR_ERR(cb_info->task); + return ret; } dprintk("nfs_callback_up: service started\n"); return 0; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ce8cb926526b..b9e66b7e0c14 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -450,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) nfs_refresh_inode(dentry->d_inode, entry->fattr); goto out; } else { - d_drop(dentry); + if (d_invalidate(dentry) != 0) + goto out; dput(dentry); } } @@ -1100,6 +1101,8 @@ out_set_verifier: out_zap_parent: nfs_zap_caches(dir); out_bad: + nfs_free_fattr(fattr); + nfs_free_fhandle(fhandle); nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ @@ -1112,8 +1115,6 @@ out_zap_parent: shrink_dcache_parent(dentry); } d_drop(dentry); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", __func__, dentry->d_parent->d_name.name, diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 31c26c4dcc23..ca4b11ec87a2 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -217,7 +217,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) { char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; struct nfs_dns_ent key, *item; - unsigned long ttl; + unsigned int ttl; ssize_t len; int ret = -EINVAL; @@ -240,7 +240,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) key.namelen = len; memset(&key.h, 0, sizeof(key.h)); - ttl = get_expiry(&buf); + if (get_uint(&buf, &ttl) < 0) + goto out; if (ttl == 0) goto out; key.h.expiry_time = ttl + seconds_since_boot(); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5c7325c5c5e6..6fa01aea2488 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -685,7 +685,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->dentry); - nfs_sb_deactive(sb); + if (is_sync) + nfs_sb_deactive(sb); + else + nfs_sb_deactive_async(sb); kfree(ctx->mdsthreshold); kfree(ctx); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 59b133c5d652..05521cadac2e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -351,10 +351,12 @@ extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern void nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); +extern void nfs_sb_deactive_async(struct super_block *sb); /* namespace.c */ +#define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, - char *buffer, ssize_t buflen); + char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); @@ -498,7 +500,7 @@ static inline char *nfs_devname(struct dentry *dentry, char *buffer, ssize_t buflen) { char *dummy; - return nfs_path(&dummy, dentry, buffer, buflen); + return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL); } /* diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 8e65c7f1f87c..015f71f8f62c 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info) else msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; - status = rpc_call_sync(mnt_clnt, &msg, 0); + status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT); rpc_shutdown_client(mnt_clnt); if (status < 0) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 655925373b91..dd057bc6b65b 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -33,6 +33,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer + * @flags - options (see below) * * Helper function for constructing the server pathname * by arbitrary hashed dentry. @@ -40,8 +41,14 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition * and in generating /proc/mounts and friends. + * + * Supported flags: + * NFS_PATH_CANONICAL: ensure there is exactly one slash after + * the original device (export) name + * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) +char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, + unsigned flags) { char *end; int namelen; @@ -74,7 +81,7 @@ rename_retry: rcu_read_unlock(); goto rename_retry; } - if (*end != '/') { + if ((flags & NFS_PATH_CANONICAL) && *end != '/') { if (--buflen < 0) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); @@ -91,9 +98,11 @@ rename_retry: return end; } namelen = strlen(base); - /* Strip off excess slashes in base string */ - while (namelen > 0 && base[namelen - 1] == '/') - namelen--; + if (flags & NFS_PATH_CANONICAL) { + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + } buflen -= namelen; if (buflen < 0) { spin_unlock(&dentry->d_lock); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 52d847212066..2e45fd9c02a3 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -122,12 +122,21 @@ static void filelayout_reset_read(struct nfs_read_data *data) } } +static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) +{ + if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return; + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); + pnfs_return_layout(inode); +} + static int filelayout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg) { - struct inode *inode = lseg->pls_layout->plh_inode; + struct pnfs_layout_hdr *lo = lseg->pls_layout; + struct inode *inode = lo->plh_inode; struct nfs_server *mds_server = NFS_SERVER(inode); struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs_client *mds_client = mds_server->nfs_client; @@ -204,10 +213,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s DS connection error %d\n", __func__, task->tk_status); nfs4_mark_deviceid_unavailable(devid); - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); - _pnfs_return_layout(inode); + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); rpc_wake_up(&tbl->slot_tbl_waitq); - nfs4_ds_disconnect(clp); /* fall through */ default: reset: @@ -331,7 +338,9 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data) static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; + struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; + filelayout_fenceme(lo->plh_inode, lo); nfs_put_client(rdata->ds_clp); rdata->header->mds_ops->rpc_release(data); } @@ -429,7 +438,9 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data) static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; + struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; + filelayout_fenceme(lo->plh_inode, lo); nfs_put_client(wdata->ds_clp); wdata->header->mds_ops->rpc_release(data); } @@ -739,7 +750,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err; if (fl->num_fh > 0) { - fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), + fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]), gfp_flags); if (!fl->fh_array) goto out_err; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index dca47d786710..8c07241fe52b 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -149,6 +149,5 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); -void nfs4_ds_disconnect(struct nfs_client *clp); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3336d5eaf879..a8eaa9b7bb0f 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -149,28 +149,6 @@ _data_server_lookup_locked(const struct list_head *dsaddrs) } /* - * Lookup DS by nfs_client pointer. Zero data server client pointer - */ -void nfs4_ds_disconnect(struct nfs_client *clp) -{ - struct nfs4_pnfs_ds *ds; - struct nfs_client *found = NULL; - - dprintk("%s clp %p\n", __func__, clp); - spin_lock(&nfs4_ds_cache_lock); - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (ds->ds_clp && ds->ds_clp == clp) { - found = ds->ds_clp; - ds->ds_clp = NULL; - } - spin_unlock(&nfs4_ds_cache_lock); - if (found) { - set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); - nfs_put_client(clp); - } -} - -/* * Create an rpc connection to the nfs4_pnfs_ds data server * Currently only supports IPv4 and IPv6 addresses */ diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index 6a83780e0ce6..549462e5b9b0 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -5,6 +5,7 @@ #include <linux/nfs_fs.h> #include "nfs4_fs.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CLIENT diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 79fbb61ce202..1e09eb78543b 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end) static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) { char *limit; - char *path = nfs_path(&limit, dentry, buffer, buflen); + char *path = nfs_path(&limit, dentry, buffer, buflen, + NFS_PATH_CANONICAL); if (!IS_ERR(path)) { char *path_component = nfs_path_component(path, limit); if (path_component) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 68b21d81b7ac..5eec4429970c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -339,8 +339,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); nfs4_schedule_session_recovery(clp->cl_session, errorcode); - exception->retry = 1; - break; + goto wait_on_recovery; #endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { @@ -1572,9 +1571,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, task)) - return; - rpc_call_start(task); + &data->o_res.seq_res, + task) != 0) + nfs_release_seqid(data->o_arg.seqid); + else + rpc_call_start(task); return; unlock_no_action: rcu_read_unlock(); @@ -1748,7 +1749,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred, /* even though OPEN succeeded, access is denied. Close the file */ nfs4_close_state(state, fmode); - return -NFS4ERR_ACCESS; + return -EACCES; } /* @@ -2196,7 +2197,7 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - nfs_sb_deactive(sb); + nfs_sb_deactive_async(sb); kfree(calldata); } @@ -2296,9 +2297,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (nfs4_setup_sequence(NFS_SERVER(inode), &calldata->arg.seq_args, &calldata->res.seq_res, - task)) - goto out; - rpc_call_start(task); + task) != 0) + nfs_release_seqid(calldata->arg.seqid); + else + rpc_call_start(task); out: dprintk("%s: done!\n", __func__); } @@ -4529,6 +4531,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) rpc_restart_call_prepare(task); } + nfs_release_seqid(calldata->arg.seqid); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) @@ -4545,9 +4548,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, &calldata->arg.seq_args, - &calldata->res.seq_res, task)) - return; - rpc_call_start(task); + &calldata->res.seq_res, + task) != 0) + nfs_release_seqid(calldata->arg.seqid); + else + rpc_call_start(task); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -4692,7 +4697,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) - return; + goto out_release_lock_seqid; data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; @@ -4701,10 +4706,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, - &data->res.seq_res, task)) + &data->res.seq_res, + task) == 0) { + rpc_call_start(task); return; - rpc_call_start(task); - dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); + } + nfs_release_seqid(data->arg.open_seqid); +out_release_lock_seqid: + nfs_release_seqid(data->arg.lock_seqid); + dprintk("%s: done!, ret = %d\n", __func__, task->tk_status); } static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) @@ -5667,7 +5677,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, tbl->slots = new; tbl->max_slots = max_slots; } - tbl->highest_used_slotid = -1; /* no slot is currently used */ + tbl->highest_used_slotid = NFS4_NO_SLOT; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index be731e6b7b9c..c6f990656f89 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -369,7 +369,7 @@ void objio_free_result(struct objlayout_io_res *oir) kfree(objios); } -enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) +static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) { switch (oep) { case OSD_ERR_PRI_NO_ERROR: @@ -574,7 +574,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, (unsigned long)pgio->pg_layout_private; } -void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { pnfs_generic_pg_init_read(pgio, req); if (unlikely(pgio->pg_lseg == NULL)) @@ -604,7 +604,7 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, return false; } -void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { unsigned long stripe_end = 0; u64 wb_size; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fe624c91bd00..2878f97bd78d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -925,8 +925,8 @@ pnfs_find_alloc_layout(struct inode *ino, if (likely(nfsi->layout == NULL)) { /* Won the race? */ nfsi->layout = new; return new; - } - pnfs_free_layout_hdr(new); + } else if (new != NULL) + pnfs_free_layout_hdr(new); out_existing: pnfs_get_layout_hdr(nfsi->layout); return nfsi->layout; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2d722dba1111..dbf7bba52da0 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -62,6 +62,7 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ + NFS_LAYOUT_RETURN, /* Return this layout ASAP */ }; enum layoutdriver_policy_flags { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e831bce49766..652d3f7176a9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -54,6 +54,7 @@ #include <linux/parser.h> #include <linux/nsproxy.h> #include <linux/rcupdate.h> +#include <linux/kthread.h> #include <asm/uaccess.h> @@ -415,6 +416,54 @@ void nfs_sb_deactive(struct super_block *sb) } EXPORT_SYMBOL_GPL(nfs_sb_deactive); +static int nfs_deactivate_super_async_work(void *ptr) +{ + struct super_block *sb = ptr; + + deactivate_super(sb); + module_put_and_exit(0); + return 0; +} + +/* + * same effect as deactivate_super, but will do final unmount in kthread + * context + */ +static void nfs_deactivate_super_async(struct super_block *sb) +{ + struct task_struct *task; + char buf[INET6_ADDRSTRLEN + 1]; + struct nfs_server *server = NFS_SB(sb); + struct nfs_client *clp = server->nfs_client; + + if (!atomic_add_unless(&sb->s_active, -1, 1)) { + rcu_read_lock(); + snprintf(buf, sizeof(buf), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + + __module_get(THIS_MODULE); + task = kthread_run(nfs_deactivate_super_async_work, sb, + "%s-deactivate-super", buf); + if (IS_ERR(task)) { + pr_err("%s: kthread_run: %ld\n", + __func__, PTR_ERR(task)); + /* make synchronous call and hope for the best */ + deactivate_super(sb); + module_put(THIS_MODULE); + } + } +} + +void nfs_sb_deactive_async(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + if (atomic_dec_and_test(&server->active)) + nfs_deactivate_super_async(sb); +} +EXPORT_SYMBOL_GPL(nfs_sb_deactive_async); + /* * Deliver file system statistics to userspace */ @@ -771,7 +820,7 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root) int err = 0; if (!page) return -ENOMEM; - devname = nfs_path(&dummy, root, page, PAGE_SIZE); + devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0); if (IS_ERR(devname)) err = PTR_ERR(devname); else diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 13cea637eff8..3f79c77153b8 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata) nfs_dec_sillycount(data->dir); nfs_free_unlinkdata(data); - nfs_sb_deactive(sb); + nfs_sb_deactive_async(sb); } static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 3e7b2a0dc0c8..07f76db04ec7 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -431,7 +431,7 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, mapping->host = inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->assoc_mapping = NULL; + mapping->private_data = NULL; mapping->backing_dev_info = bdi; mapping->a_ops = &empty_aops; } diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index f35794b97e8e..a50636025364 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) if ((old->path.mnt == new->path.mnt) && (old->path.dentry == new->path.dentry)) return true; + break; case (FSNOTIFY_EVENT_NONE): return true; default: diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 721d692fa8d4..6fcaeb8c902e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -258,7 +258,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (ret) goto out_close_fd; - fd_install(fd, f); + if (fd != FAN_NOFD) + fd_install(fd, f); return fanotify_event_metadata.event_len; out_close_fd: diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5a4ee77cec51..dda089804942 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2513,18 +2513,15 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, ret = sd.num_spliced; if (ret > 0) { - unsigned long nr_pages; int err; - nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - err = generic_write_sync(out, *ppos, ret); if (err) ret = err; else *ppos += ret; - balance_dirty_pages_ratelimited_nr(mapping, nr_pages); + balance_dirty_pages_ratelimited(mapping); } return ret; diff --git a/fs/proc/array.c b/fs/proc/array.c index c1c207c36cae..d3696708fc1a 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -438,7 +438,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_times(task, &utime, &stime); + thread_group_cputime_adjusted(task, &utime, &stime); gtime += sig->gtime; } @@ -454,7 +454,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; - task_times(task, &utime, &stime); + task_cputime_adjusted(task, &utime, &stime); gtime = task->gtime; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 144a96732dd7..aa63d25157b8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -873,12 +873,119 @@ static const struct file_operations proc_environ_operations = { .release = mem_release, }; +static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + char buffer[PROC_NUMBUF]; + int oom_adj = OOM_ADJUST_MIN; + size_t len; + unsigned long flags; + + if (!task) + return -ESRCH; + if (lock_task_sighand(task, &flags)) { + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + oom_adj = OOM_ADJUST_MAX; + else + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / + OOM_SCORE_ADJ_MAX; + unlock_task_sighand(task, &flags); + } + put_task_struct(task); + len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t oom_adj_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF]; + int oom_adj; + unsigned long flags; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &oom_adj); + if (err) + goto out; + if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && + oom_adj != OOM_DISABLE) { + err = -EINVAL; + goto out; + } + + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) { + err = -ESRCH; + goto out; + } + + task_lock(task); + if (!task->mm) { + err = -EINVAL; + goto err_task_lock; + } + + if (!lock_task_sighand(task, &flags)) { + err = -ESRCH; + goto err_task_lock; + } + + /* + * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum + * value is always attainable. + */ + if (oom_adj == OOM_ADJUST_MAX) + oom_adj = OOM_SCORE_ADJ_MAX; + else + oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; + + if (oom_adj < task->signal->oom_score_adj && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_sighand; + } + + /* + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use + * /proc/pid/oom_score_adj instead. + */ + printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); + + task->signal->oom_score_adj = oom_adj; + trace_oom_score_adj_update(task); +err_sighand: + unlock_task_sighand(task, &flags); +err_task_lock: + task_unlock(task); + put_task_struct(task); +out: + return err < 0 ? err : count; +} + +static const struct file_operations proc_oom_adj_operations = { + .read = oom_adj_read, + .write = oom_adj_write, + .llseek = generic_file_llseek, +}; + static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); char buffer[PROC_NUMBUF]; - int oom_score_adj = OOM_SCORE_ADJ_MIN; + short oom_score_adj = OOM_SCORE_ADJ_MIN; unsigned long flags; size_t len; @@ -889,7 +996,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, unlock_task_sighand(task, &flags); } put_task_struct(task); - len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); + len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } @@ -936,15 +1043,15 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, goto err_task_lock; } - if (oom_score_adj < task->signal->oom_score_adj_min && + if ((short)oom_score_adj < task->signal->oom_score_adj_min && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_sighand; } - task->signal->oom_score_adj = oom_score_adj; + task->signal->oom_score_adj = (short)oom_score_adj; if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) - task->signal->oom_score_adj_min = oom_score_adj; + task->signal->oom_score_adj_min = (short)oom_score_adj; trace_oom_score_adj_update(task); err_sighand: @@ -1770,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!vma) goto out_no_vma; - result = proc_map_files_instantiate(dir, dentry, task, - (void *)(unsigned long)vma->vm_file->f_mode); + if (vma->vm_file) + result = proc_map_files_instantiate(dir, dentry, task, + (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: up_read(&mm->mmap_sem); @@ -2598,6 +2706,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), @@ -2964,6 +3073,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index cceaab07ad54..43973b084abf 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/proc_fs.h> struct ctl_table_header; +struct mempolicy; extern struct proc_dir_entry proc_root; #ifdef CONFIG_PROC_SYSCTL @@ -74,6 +75,9 @@ struct proc_maps_private { #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif +#ifdef CONFIG_NUMA + struct mempolicy *task_mempolicy; +#endif }; void proc_init_inodecache(void); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 64c3b3172367..e296572c73ed 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu) static u64 get_idle_time(int cpu) { - u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); + u64 idle, idle_time = -1ULL; + + if (cpu_online(cpu)) + idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) - /* !NO_HZ so we can rely on cpustat.idle */ + /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; else idle = usecs_to_cputime64(idle_time); @@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu) static u64 get_iowait_time(int cpu) { - u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL); + u64 iowait, iowait_time = -1ULL; + + if (cpu_online(cpu)) + iowait_time = get_cpu_iowait_time_us(cpu, NULL); if (iowait_time == -1ULL) - /* !NO_HZ so we can rely on cpustat.iowait */ + /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; else iowait = usecs_to_cputime64(iowait_time); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 79827ce03e3b..90c63f9392a5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -90,10 +90,55 @@ static void pad_len_spaces(struct seq_file *m, int len) seq_printf(m, "%*c", len, ' '); } +#ifdef CONFIG_NUMA +/* + * These functions are for numa_maps but called in generic **maps seq_file + * ->start(), ->stop() ops. + * + * numa_maps scans all vmas under mmap_sem and checks their mempolicy. + * Each mempolicy object is controlled by reference counting. The problem here + * is how to avoid accessing dead mempolicy object. + * + * Because we're holding mmap_sem while reading seq_file, it's safe to access + * each vma's mempolicy, no vma objects will never drop refs to mempolicy. + * + * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy + * is set and replaced under mmap_sem but unrefed and cleared under task_lock(). + * So, without task_lock(), we cannot trust get_vma_policy() because we cannot + * gurantee the task never exits under us. But taking task_lock() around + * get_vma_plicy() causes lock order problem. + * + * To access task->mempolicy without lock, we hold a reference count of an + * object pointed by task->mempolicy and remember it. This will guarantee + * that task->mempolicy points to an alive object or NULL in numa_maps accesses. + */ +static void hold_task_mempolicy(struct proc_maps_private *priv) +{ + struct task_struct *task = priv->task; + + task_lock(task); + priv->task_mempolicy = task->mempolicy; + mpol_get(priv->task_mempolicy); + task_unlock(task); +} +static void release_task_mempolicy(struct proc_maps_private *priv) +{ + mpol_put(priv->task_mempolicy); +} +#else +static void hold_task_mempolicy(struct proc_maps_private *priv) +{ +} +static void release_task_mempolicy(struct proc_maps_private *priv) +{ +} +#endif + static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { if (vma && vma != priv->tail_vma) { struct mm_struct *mm = vma->vm_mm; + release_task_mempolicy(priv); up_read(&mm->mmap_sem); mmput(mm); } @@ -132,7 +177,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; - + hold_task_mempolicy(priv); /* Start with last addr hint */ vma = find_vma(mm, last_addr); if (last_addr && vma) { @@ -159,6 +204,7 @@ out: if (vma) return vma; + release_task_mempolicy(priv); /* End of vmas has been reached */ m->version = (tail_vma != NULL)? 0: -1UL; up_read(&mm->mmap_sem); @@ -1158,6 +1204,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; + struct task_struct *task = proc_priv->task; struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = {}; struct mempolicy *pol; @@ -1177,7 +1224,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.private = md; walk.mm = mm; - pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); + pol = get_vma_policy(task, vma, vma->vm_start); mpol_to_str(buffer, sizeof(buffer), pol, 0); mpol_cond_put(pol); @@ -1189,7 +1236,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_printf(m, " heap"); } else { - pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); + pid_t tid = vm_is_stack(task, vma, is_pid); if (tid != 0) { /* * Thread stack in /proc/PID/task/TID/maps or diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 4ab572e6d277..ed1d8c7212da 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -49,6 +49,7 @@ struct pstore_private { struct pstore_info *psi; enum pstore_type_id type; u64 id; + int count; ssize_t size; char data[]; }; @@ -175,7 +176,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) struct pstore_private *p = dentry->d_inode->i_private; if (p->psi->erase) - p->psi->erase(p->type, p->id, p->psi); + p->psi->erase(p->type, p->id, p->count, + dentry->d_inode->i_ctime, p->psi); return simple_unlink(dir, dentry); } @@ -270,7 +272,7 @@ int pstore_is_mounted(void) * Load it up with "size" bytes of data from "buf". * Set the mtime & ctime to the date that this record was originally stored. */ -int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, +int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, char *data, size_t size, struct timespec time, struct pstore_info *psi) { @@ -306,6 +308,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, goto fail_alloc; private->type = type; private->id = id; + private->count = count; private->psi = psi; switch (type) { diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 4847f588b7d5..937d820f273c 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -50,7 +50,7 @@ extern struct pstore_info *psinfo; extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(int); extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, - char *data, size_t size, + int count, char *data, size_t size, struct timespec time, struct pstore_info *psi); extern int pstore_is_mounted(void); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index a40da07e93d6..5ea2e77ff023 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -136,7 +136,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, break; ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, - hsize + len, psinfo); + oopscount, hsize + len, psinfo); if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_new_entry = 1; @@ -161,6 +161,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) while (s < e) { unsigned long flags; + u64 id; if (c > psinfo->bufsize) c = psinfo->bufsize; @@ -172,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) spin_lock_irqsave(&psinfo->buf_lock, flags); } memcpy(psinfo->buf, s, c); - psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); + psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; @@ -196,7 +197,7 @@ static void pstore_register_console(void) {} static int pstore_write_compat(enum pstore_type_id type, enum kmsg_dump_reason reason, - u64 *id, unsigned int part, + u64 *id, unsigned int part, int count, size_t size, struct pstore_info *psi) { return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); @@ -266,6 +267,7 @@ void pstore_get_records(int quiet) char *buf = NULL; ssize_t size; u64 id; + int count; enum pstore_type_id type; struct timespec time; int failed = 0, rc; @@ -277,9 +279,9 @@ void pstore_get_records(int quiet) if (psi->open && psi->open(psi)) goto out; - while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) { - rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size, - time, psi); + while ((size = psi->read(&id, &type, &count, &time, &buf, psi)) > 0) { + rc = pstore_mkfile(type, psi->name, id, count, buf, + (size_t)size, time, psi); kfree(buf); buf = NULL; if (rc && (rc != -EEXIST || !quiet)) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1a4f6da58eab..2bfa36e0ffe8 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -132,9 +132,8 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, } static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, - struct timespec *time, - char **buf, - struct pstore_info *psi) + int *count, struct timespec *time, + char **buf, struct pstore_info *psi) { ssize_t size; struct ramoops_context *cxt = psi->data; @@ -236,8 +235,8 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, return 0; } -static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, - struct pstore_info *psi) +static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, + struct timespec time, struct pstore_info *psi) { struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 557a9c20a215..05ae3c97f7a5 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1160,6 +1160,8 @@ static int need_print_warning(struct dquot_warn *warn) return uid_eq(current_fsuid(), warn->w_dq_id.uid); case GRPQUOTA: return in_group_p(warn->w_dq_id.gid); + case PRJQUOTA: /* Never taken... Just make gcc happy */ + return 0; } return 0; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index f27f01a98aa2..d83736fbc26c 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1782,8 +1782,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, BUG_ON(!th->t_trans_id); - dquot_initialize(inode); + reiserfs_write_unlock(inode->i_sb); err = dquot_alloc_inode(inode); + reiserfs_write_lock(inode->i_sb); if (err) goto out_end_trans; if (!dir->i_nlink) { @@ -1979,8 +1980,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, out_end_trans: journal_end(th, th->t_super, th->t_blocks_allocated); + reiserfs_write_unlock(inode->i_sb); /* Drop can be outside and it needs more credits so it's better to have it outside */ dquot_drop(inode); + reiserfs_write_lock(inode->i_sb); inode->i_flags |= S_NOQUOTA; make_bad_inode(inode); @@ -3103,10 +3106,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) /* must be turned off for recursive notify_change calls */ ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); - depth = reiserfs_write_lock_once(inode->i_sb); if (is_quota_modification(inode, attr)) dquot_initialize(inode); - + depth = reiserfs_write_lock_once(inode->i_sb); if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate @@ -3170,7 +3172,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) error = journal_begin(&th, inode->i_sb, jbegin_count); if (error) goto out; + reiserfs_write_unlock_once(inode->i_sb, depth); error = dquot_transfer(inode, attr); + depth = reiserfs_write_lock_once(inode->i_sb); if (error) { journal_end(&th, inode->i_sb, jbegin_count); goto out; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index f8afa4b162b8..2f40a4c70a4d 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree key2type(&(key->on_disk_key))); #endif + reiserfs_write_unlock(inode->i_sb); retval = dquot_alloc_space_nodirty(inode, pasted_size); + reiserfs_write_lock(inode->i_sb); if (retval) { pathrelse(search_path); return retval; @@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif + reiserfs_write_unlock(inode->i_sb); /* We can't dirty inode here. It would be immediately written but * appropriate stat item isn't inserted yet... */ retval = dquot_alloc_space_nodirty(inode, quota_bytes); + reiserfs_write_lock(inode->i_sb); if (retval) { pathrelse(path); return retval; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1078ae179993..418bdc3a57da 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -298,7 +298,9 @@ static int finish_unfinished(struct super_block *s) retval = remove_save_link_only(s, &save_link_key, 0); continue; } + reiserfs_write_unlock(s); dquot_initialize(inode); + reiserfs_write_lock(s); if (truncate && S_ISDIR(inode->i_mode)) { /* We got a truncate request for a dir which is impossible. @@ -1335,7 +1337,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) kfree(qf_names[i]); #endif err = -EINVAL; - goto out_err; + goto out_unlock; } #ifdef CONFIG_QUOTA handle_quota_files(s, qf_names, &qfmt); @@ -1379,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) if (blocks) { err = reiserfs_resize(s, blocks); if (err != 0) - goto out_err; + goto out_unlock; } if (*mount_flags & MS_RDONLY) { @@ -1389,9 +1391,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) /* it is read-only already */ goto out_ok; + /* + * Drop write lock. Quota will retake it when needed and lock + * ordering requires calling dquot_suspend() without it. + */ + reiserfs_write_unlock(s); err = dquot_suspend(s, -1); if (err < 0) goto out_err; + reiserfs_write_lock(s); /* try to remount file system with read-only permissions */ if (sb_umount_state(rs) == REISERFS_VALID_FS @@ -1401,7 +1409,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) err = journal_begin(&th, s, 10); if (err) - goto out_err; + goto out_unlock; /* Mounting a rw partition read-only. */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1416,7 +1424,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) if (reiserfs_is_journal_aborted(journal)) { err = journal->j_errno; - goto out_err; + goto out_unlock; } handle_data_mode(s, mount_options); @@ -1425,7 +1433,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ err = journal_begin(&th, s, 10); if (err) - goto out_err; + goto out_unlock; /* Mount a partition which is read-only, read-write */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1442,10 +1450,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) SB_JOURNAL(s)->j_must_wait = 1; err = journal_end(&th, s, 10); if (err) - goto out_err; + goto out_unlock; if (!(*mount_flags & MS_RDONLY)) { + /* + * Drop write lock. Quota will retake it when needed and lock + * ordering requires calling dquot_resume() without it. + */ + reiserfs_write_unlock(s); dquot_resume(s, -1); + reiserfs_write_lock(s); finish_unfinished(s); reiserfs_xattr_init(s, *mount_flags); } @@ -1455,9 +1469,10 @@ out_ok: reiserfs_write_unlock(s); return 0; +out_unlock: + reiserfs_write_unlock(s); out_err: kfree(new_opts); - reiserfs_write_unlock(s); return err; } @@ -2095,13 +2110,15 @@ static int reiserfs_write_dquot(struct dquot *dquot) REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (ret) goto out; + reiserfs_write_unlock(dquot->dq_sb); ret = dquot_commit(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -2117,13 +2134,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot) REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (ret) goto out; + reiserfs_write_unlock(dquot->dq_sb); ret = dquot_acquire(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -2137,19 +2156,21 @@ static int reiserfs_release_dquot(struct dquot *dquot) ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + reiserfs_write_unlock(dquot->dq_sb); if (ret) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); goto out; } ret = dquot_release(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: reiserfs_write_unlock(dquot->dq_sb); +out: return ret; } @@ -2174,11 +2195,13 @@ static int reiserfs_write_info(struct super_block *sb, int type) ret = journal_begin(&th, sb, 2); if (ret) goto out; + reiserfs_write_unlock(sb); ret = dquot_commit_info(sb, type); + reiserfs_write_lock(sb); err = journal_end(&th, sb, 2); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(sb); return ret; } @@ -2203,8 +2226,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, struct reiserfs_transaction_handle th; int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) - return -EINVAL; + reiserfs_write_lock(sb); + if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) { + err = -EINVAL; + goto out; + } /* Quotafile not on the same filesystem? */ if (path->dentry->d_sb != sb) { @@ -2246,8 +2272,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (err) goto out; } - err = dquot_quota_on(sb, type, format_id, path); + reiserfs_write_unlock(sb); + return dquot_quota_on(sb, type, format_id, path); out: + reiserfs_write_unlock(sb); return err; } @@ -2320,7 +2348,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; + reiserfs_write_lock(sb); err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); + reiserfs_write_unlock(sb); if (err) goto out; if (offset || tocopy != sb->s_blocksize) @@ -2336,10 +2366,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, flush_dcache_page(bh->b_page); set_buffer_uptodate(bh); unlock_buffer(bh); + reiserfs_write_lock(sb); reiserfs_prepare_for_journal(sb, bh, 1); journal_mark_dirty(current->journal_info, sb, bh); if (!journal_quota) reiserfs_add_ordered_list(inode, bh); + reiserfs_write_unlock(sb); brelse(bh); offset = 0; towrite -= tocopy; diff --git a/fs/splice.c b/fs/splice.c index 13e5b4776e7a..8890604e3fcd 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1024,17 +1024,14 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ret = sd.num_spliced; if (ret > 0) { - unsigned long nr_pages; int err; - nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - err = generic_write_sync(out, *ppos, ret); if (err) ret = err; else *ppos += ret; - balance_dirty_pages_ratelimited_nr(mapping, nr_pages); + balance_dirty_pages_ratelimited(mapping); } sb_end_write(inode->i_sb); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 6b0bb00d4d2b..2fbdff6be25c 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -485,20 +485,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) /** * sysfs_pathname - return full path to sysfs dirent * @sd: sysfs_dirent whose path we want - * @path: caller allocated buffer + * @path: caller allocated buffer of size PATH_MAX * * Gives the name "/" to the sysfs_root entry; any path returned * is relative to wherever sysfs is mounted. - * - * XXX: does no error checking on @path size */ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) { if (sd->s_parent) { sysfs_pathname(sd->s_parent, path); - strcat(path, "/"); + strlcat(path, "/", PATH_MAX); } - strcat(path, sd->s_name); + strlcat(path, sd->s_name, PATH_MAX); return path; } @@ -531,9 +529,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) char *path = kzalloc(PATH_MAX, GFP_KERNEL); WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", - (path == NULL) ? sd->s_name : - strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"), - sd->s_name)); + (path == NULL) ? sd->s_name + : (sysfs_pathname(acxt->parent_sd, path), + strlcat(path, "/", PATH_MAX), + strlcat(path, sd->s_name, PATH_MAX), + path)); kfree(path); } diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 00012e31829d..602f56db0442 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -485,8 +485,8 @@ const struct file_operations sysfs_file_operations = { .poll = sysfs_poll, }; -int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, - const void **pns) +static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, + const void **pns) { struct sysfs_dirent *dir_sd = kobj->sd; const struct sysfs_ops *ops; diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 28ec13af28d9..2dcf3d473fec 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c) if (!lprops) { lprops = ubifs_fast_find_freeable(c); if (!lprops) { - ubifs_assert(c->freeable_cnt == 0); - if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + /* + * The first condition means the following: go scan the + * LPT if there are uncategorized lprops, which means + * there may be freeable LEBs there (UBIFS does not + * store the information about freeable LEBs in the + * master node). + */ + if (c->in_a_category_cnt != c->main_lebs || + c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + ubifs_assert(c->freeable_cnt == 0); lprops = scan_for_leb_for_idx(c); if (IS_ERR(lprops)) { err = PTR_ERR(lprops); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index e5a2a35a46dc..46190a7c42a6 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, default: ubifs_assert(0); } + lprops->flags &= ~LPROPS_CAT_MASK; lprops->flags |= cat; + c->in_a_category_cnt += 1; + ubifs_assert(c->in_a_category_cnt <= c->main_lebs); } /** @@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c, default: ubifs_assert(0); } + + c->in_a_category_cnt -= 1; + ubifs_assert(c->in_a_category_cnt >= 0); } /** diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 5486346d0a3f..d133c276fe05 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1183,6 +1183,8 @@ struct ubifs_debug_info; * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list + * @in_a_category_cnt: count of lprops which are in a certain category, which + * basically meants that they were loaded from the flash * * @ltab_lnum: LEB number of LPT's own lprops table * @ltab_offs: offset of LPT's own lprops table @@ -1412,6 +1414,7 @@ struct ubifs_info { struct list_head freeable_list; struct list_head frdi_idx_list; int freeable_cnt; + int in_a_category_cnt; int ltab_lnum; int ltab_offs; diff --git a/fs/xattr.c b/fs/xattr.c index e164dddb8e96..e21c119f4f99 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -846,7 +846,7 @@ static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags) { struct simple_xattr *xattr; - struct simple_xattr *uninitialized_var(new_xattr); + struct simple_xattr *new_xattr = NULL; int err = 0; /* value == NULL means remove */ |