diff options
Diffstat (limited to 'fs')
59 files changed, 973 insertions, 581 deletions
diff --git a/fs/afs/cache.h b/fs/afs/cache.h deleted file mode 100644 index 5c4f6b499e90..000000000000 --- a/fs/afs/cache.h +++ /dev/null @@ -1,12 +0,0 @@ -/* AFS local cache management interface - * - * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/fscache.h> diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 106be66dafd2..6ece2a13bf71 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -18,10 +18,10 @@ #include <linux/key.h> #include <linux/workqueue.h> #include <linux/sched.h> +#include <linux/fscache.h> #include "afs.h" #include "afs_vl.h" -#include "cache.h" #define AFS_CELL_MAX_ADDRS 15 diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f128427b995b..69b355ae7f49 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -27,7 +27,7 @@ #include "btrfs_inode.h" #include "xattr.h" -#ifdef CONFIG_FS_POSIX_ACL +#ifdef CONFIG_BTRFS_POSIX_ACL static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { @@ -313,7 +313,7 @@ struct xattr_handler btrfs_xattr_acl_access_handler = { .set = btrfs_xattr_acl_access_set, }; -#else /* CONFIG_FS_POSIX_ACL */ +#else /* CONFIG_BTRFS_POSIX_ACL */ int btrfs_acl_chmod(struct inode *inode) { @@ -325,4 +325,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) return 0; } -#endif /* CONFIG_FS_POSIX_ACL */ +#endif /* CONFIG_BTRFS_POSIX_ACL */ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 82ee56bba299..a54d354cefcb 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -128,6 +128,14 @@ struct btrfs_inode { u64 last_unlink_trans; /* + * These two counters are for delalloc metadata reservations. We keep + * track of how many extents we've accounted for vs how many extents we + * have. + */ + int delalloc_reserved_extents; + int delalloc_extents; + + /* * ordered_data_close is set by truncate when a file that used * to have good data has been truncated to zero. When it is set * the btrfs file release call will add this inode to the diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80599b4e42bd..dd8ced9814c4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -675,18 +675,19 @@ struct btrfs_space_info { current allocations */ u64 bytes_readonly; /* total bytes that are read only */ u64 bytes_super; /* total bytes reserved for the super blocks */ - - /* delalloc accounting */ - u64 bytes_delalloc; /* number of bytes reserved for allocation, - this space is not necessarily reserved yet - by the allocator */ + u64 bytes_root; /* the number of bytes needed to commit a + transaction */ u64 bytes_may_use; /* number of bytes that may be used for - delalloc */ + delalloc/allocations */ + u64 bytes_delalloc; /* number of bytes currently reserved for + delayed allocation */ int full; /* indicates that we cannot allocate any more chunks for this space */ int force_alloc; /* set if we need to force a chunk alloc for this space */ + int force_delalloc; /* make people start doing filemap_flush until + we're under a threshold */ struct list_head list; @@ -695,6 +696,9 @@ struct btrfs_space_info { spinlock_t lock; struct rw_semaphore groups_sem; atomic_t caching_threads; + + int allocating_chunk; + wait_queue_head_t wait; }; /* @@ -2022,7 +2026,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); -int btrfs_check_metadata_free_space(struct btrfs_root *root); +int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); +int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); +int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, + struct inode *inode, int num_items); +int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, + struct inode *inode, int num_items); int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct btrfs_root *root, @@ -2326,7 +2335,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); -extern struct file_operations btrfs_file_operations; +extern const struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 locked_end, @@ -2357,7 +2366,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); /* acl.c */ -#ifdef CONFIG_FS_POSIX_ACL +#ifdef CONFIG_BTRFS_POSIX_ACL int btrfs_check_acl(struct inode *inode, int mask); #else #define btrfs_check_acl NULL diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 644e796fd643..af0435f79fa6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -822,14 +822,14 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, int btrfs_write_tree_block(struct extent_buffer *buf) { - return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, - buf->start + buf->len - 1, WB_SYNC_ALL); + return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, + buf->start + buf->len - 1); } int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) { - return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, - buf->start, buf->start + buf->len - 1); + return filemap_fdatawait_range(buf->first_page->mapping, + buf->start, buf->start + buf->len - 1); } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, @@ -1630,7 +1630,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - fs_info->metadata_ratio = 8; + fs_info->metadata_ratio = 0; fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 993f93ff7ba6..359a754c782c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, struct extent_buffer **must_clean); static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); +static void dump_space_info(struct btrfs_space_info *info, u64 bytes, + int dump_block_groups); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -2765,67 +2767,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) alloc_target); } +static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) +{ + u64 num_bytes; + int level; + + level = BTRFS_MAX_LEVEL - 2; + /* + * NOTE: these calculations are absolutely the worst possible case. + * This assumes that _every_ item we insert will require a new leaf, and + * that the tree has grown to its maximum level size. + */ + + /* + * for every item we insert we could insert both an extent item and a + * extent ref item. Then for ever item we insert, we will need to cow + * both the original leaf, plus the leaf to the left and right of it. + * + * Unless we are talking about the extent root, then we just want the + * number of items * 2, since we just need the extent item plus its ref. + */ + if (root == root->fs_info->extent_root) + num_bytes = num_items * 2; + else + num_bytes = (num_items + (2 * num_items)) * 3; + + /* + * num_bytes is total number of leaves we could need times the leaf + * size, and then for every leaf we could end up cow'ing 2 nodes per + * level, down to the leaf level. + */ + num_bytes = (num_bytes * root->leafsize) + + (num_bytes * (level * 2)) * root->nodesize; + + return num_bytes; +} + /* - * for now this just makes sure we have at least 5% of our metadata space free - * for use. + * Unreserve metadata space for delalloc. If we have less reserved credits than + * we have extents, this function does nothing. */ -int btrfs_check_metadata_free_space(struct btrfs_root *root) +int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, + struct inode *inode, int num_items) { struct btrfs_fs_info *info = root->fs_info; struct btrfs_space_info *meta_sinfo; - u64 alloc_target, thresh; - int committed = 0, ret; + u64 num_bytes; + u64 alloc_target; + bool bug = false; /* get the space info for where the metadata will live */ alloc_target = btrfs_get_alloc_profile(root, 0); meta_sinfo = __find_space_info(info, alloc_target); - if (!meta_sinfo) - goto alloc; -again: + num_bytes = calculate_bytes_needed(root->fs_info->extent_root, + num_items); + spin_lock(&meta_sinfo->lock); - if (!meta_sinfo->full) - thresh = meta_sinfo->total_bytes * 80; - else - thresh = meta_sinfo->total_bytes * 95; + if (BTRFS_I(inode)->delalloc_reserved_extents <= + BTRFS_I(inode)->delalloc_extents) { + spin_unlock(&meta_sinfo->lock); + return 0; + } + + BTRFS_I(inode)->delalloc_reserved_extents--; + BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); + + if (meta_sinfo->bytes_delalloc < num_bytes) { + bug = true; + meta_sinfo->bytes_delalloc = 0; + } else { + meta_sinfo->bytes_delalloc -= num_bytes; + } + spin_unlock(&meta_sinfo->lock); + BUG_ON(bug); + + return 0; +} + +static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) +{ + u64 thresh; + + thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super + meta_sinfo->bytes_root + + meta_sinfo->bytes_may_use; + + thresh = meta_sinfo->total_bytes - thresh; + thresh *= 80; do_div(thresh, 100); + if (thresh <= meta_sinfo->bytes_delalloc) + meta_sinfo->force_delalloc = 1; + else + meta_sinfo->force_delalloc = 0; +} - if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + - meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + - meta_sinfo->bytes_super > thresh) { - struct btrfs_trans_handle *trans; - if (!meta_sinfo->full) { - meta_sinfo->force_alloc = 1; +static int maybe_allocate_chunk(struct btrfs_root *root, + struct btrfs_space_info *info) +{ + struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_trans_handle *trans; + bool wait = false; + int ret = 0; + u64 min_metadata; + u64 free_space; + + free_space = btrfs_super_total_bytes(disk_super); + /* + * we allow the metadata to grow to a max of either 5gb or 5% of the + * space in the volume. + */ + min_metadata = min((u64)5 * 1024 * 1024 * 1024, + div64_u64(free_space * 5, 100)); + if (info->total_bytes >= min_metadata) { + spin_unlock(&info->lock); + return 0; + } + + if (info->full) { + spin_unlock(&info->lock); + return 0; + } + + if (!info->allocating_chunk) { + info->force_alloc = 1; + info->allocating_chunk = 1; + init_waitqueue_head(&info->wait); + } else { + wait = true; + } + + spin_unlock(&info->lock); + + if (wait) { + wait_event(info->wait, + !info->allocating_chunk); + return 1; + } + + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; + } + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + 4096 + 2 * 1024 * 1024, + info->flags, 0); + btrfs_end_transaction(trans, root); + if (ret) + goto out; +out: + spin_lock(&info->lock); + info->allocating_chunk = 0; + spin_unlock(&info->lock); + wake_up(&info->wait); + + if (ret) + return 0; + return 1; +} + +/* + * Reserve metadata space for delalloc. + */ +int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, + struct inode *inode, int num_items) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 num_bytes; + u64 used; + u64 alloc_target; + int flushed = 0; + int force_delalloc; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + num_bytes = calculate_bytes_needed(root->fs_info->extent_root, + num_items); +again: + spin_lock(&meta_sinfo->lock); + + force_delalloc = meta_sinfo->force_delalloc; + + if (unlikely(!meta_sinfo->bytes_root)) + meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); + + if (!flushed) + meta_sinfo->bytes_delalloc += num_bytes; + + used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super + meta_sinfo->bytes_root + + meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; + + if (used > meta_sinfo->total_bytes) { + flushed++; + + if (flushed == 1) { + if (maybe_allocate_chunk(root, meta_sinfo)) + goto again; + flushed++; + } else { spin_unlock(&meta_sinfo->lock); -alloc: - trans = btrfs_start_transaction(root, 1); - if (!trans) - return -ENOMEM; + } - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - 2 * 1024 * 1024, alloc_target, 0); - btrfs_end_transaction(trans, root); - if (!meta_sinfo) { - meta_sinfo = __find_space_info(info, - alloc_target); - } + if (flushed == 2) { + filemap_flush(inode->i_mapping); + goto again; + } else if (flushed == 3) { + btrfs_start_delalloc_inodes(root); + btrfs_wait_ordered_extents(root, 0); goto again; } + spin_lock(&meta_sinfo->lock); + meta_sinfo->bytes_delalloc -= num_bytes; spin_unlock(&meta_sinfo->lock); + printk(KERN_ERR "enospc, has %d, reserved %d\n", + BTRFS_I(inode)->delalloc_extents, + BTRFS_I(inode)->delalloc_reserved_extents); + dump_space_info(meta_sinfo, 0, 0); + return -ENOSPC; + } - if (!committed) { - committed = 1; - trans = btrfs_join_transaction(root, 1); - if (!trans) - return -ENOMEM; - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; + BTRFS_I(inode)->delalloc_reserved_extents++; + check_force_delalloc(meta_sinfo); + spin_unlock(&meta_sinfo->lock); + + if (!flushed && force_delalloc) + filemap_flush(inode->i_mapping); + + return 0; +} + +/* + * unreserve num_items number of items worth of metadata space. This needs to + * be paired with btrfs_reserve_metadata_space. + * + * NOTE: if you have the option, run this _AFTER_ you do a + * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref + * oprations which will result in more used metadata, so we want to make sure we + * can do that without issue. + */ +int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 num_bytes; + u64 alloc_target; + bool bug = false; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + num_bytes = calculate_bytes_needed(root, num_items); + + spin_lock(&meta_sinfo->lock); + if (meta_sinfo->bytes_may_use < num_bytes) { + bug = true; + meta_sinfo->bytes_may_use = 0; + } else { + meta_sinfo->bytes_may_use -= num_bytes; + } + spin_unlock(&meta_sinfo->lock); + + BUG_ON(bug); + + return 0; +} + +/* + * Reserve some metadata space for use. We'll calculate the worste case number + * of bytes that would be needed to modify num_items number of items. If we + * have space, fantastic, if not, you get -ENOSPC. Please call + * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of + * items you reserved, since whatever metadata you needed should have already + * been allocated. + * + * This will commit the transaction to make more space if we don't have enough + * metadata space. THe only time we don't do this is if we're reserving space + * inside of a transaction, then we will just return -ENOSPC and it is the + * callers responsibility to handle it properly. + */ +int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 num_bytes; + u64 used; + u64 alloc_target; + int retries = 0; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + num_bytes = calculate_bytes_needed(root, num_items); +again: + spin_lock(&meta_sinfo->lock); + + if (unlikely(!meta_sinfo->bytes_root)) + meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); + + if (!retries) + meta_sinfo->bytes_may_use += num_bytes; + + used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super + meta_sinfo->bytes_root + + meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; + + if (used > meta_sinfo->total_bytes) { + retries++; + if (retries == 1) { + if (maybe_allocate_chunk(root, meta_sinfo)) + goto again; + retries++; + } else { + spin_unlock(&meta_sinfo->lock); + } + + if (retries == 2) { + btrfs_start_delalloc_inodes(root); + btrfs_wait_ordered_extents(root, 0); goto again; } + spin_lock(&meta_sinfo->lock); + meta_sinfo->bytes_may_use -= num_bytes; + spin_unlock(&meta_sinfo->lock); + + dump_space_info(meta_sinfo, 0, 0); return -ENOSPC; } + + check_force_delalloc(meta_sinfo); spin_unlock(&meta_sinfo->lock); return 0; @@ -2888,7 +3169,7 @@ alloc: spin_unlock(&data_sinfo->lock); /* commit the current transaction and try again */ - if (!committed) { + if (!committed && !root->fs_info->open_ioctl_trans) { committed = 1; trans = btrfs_join_transaction(root, 1); if (!trans) @@ -2916,7 +3197,7 @@ alloc: BTRFS_I(inode)->reserved_bytes += bytes; spin_unlock(&data_sinfo->lock); - return btrfs_check_metadata_free_space(root); + return 0; } /* @@ -3015,17 +3296,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, BUG_ON(!space_info); spin_lock(&space_info->lock); - if (space_info->force_alloc) { + if (space_info->force_alloc) force = 1; - space_info->force_alloc = 0; - } if (space_info->full) { spin_unlock(&space_info->lock); goto out; } thresh = space_info->total_bytes - space_info->bytes_readonly; - thresh = div_factor(thresh, 6); + thresh = div_factor(thresh, 8); if (!force && (space_info->bytes_used + space_info->bytes_pinned + space_info->bytes_reserved + alloc_bytes) < thresh) { @@ -3039,7 +3318,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, * we keep a reasonable number of metadata chunks allocated in the * FS as well. */ - if (flags & BTRFS_BLOCK_GROUP_DATA) { + if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { fs_info->data_chunk_allocations++; if (!(fs_info->data_chunk_allocations % fs_info->metadata_ratio)) @@ -3047,8 +3326,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, } ret = btrfs_alloc_chunk(trans, extent_root, flags); + spin_lock(&space_info->lock); if (ret) space_info->full = 1; + space_info->force_alloc = 0; + spin_unlock(&space_info->lock); out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; @@ -4063,21 +4345,32 @@ loop: return ret; } -static void dump_space_info(struct btrfs_space_info *info, u64 bytes) +static void dump_space_info(struct btrfs_space_info *info, u64 bytes, + int dump_block_groups) { struct btrfs_block_group_cache *cache; + spin_lock(&info->lock); printk(KERN_INFO "space_info has %llu free, is %sfull\n", (unsigned long long)(info->total_bytes - info->bytes_used - - info->bytes_pinned - info->bytes_reserved), + info->bytes_pinned - info->bytes_reserved - + info->bytes_super), (info->full) ? "" : "not "); printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," - " may_use=%llu, used=%llu\n", + " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" + "\n", (unsigned long long)info->total_bytes, (unsigned long long)info->bytes_pinned, (unsigned long long)info->bytes_delalloc, (unsigned long long)info->bytes_may_use, - (unsigned long long)info->bytes_used); + (unsigned long long)info->bytes_used, + (unsigned long long)info->bytes_root, + (unsigned long long)info->bytes_super, + (unsigned long long)info->bytes_reserved); + spin_unlock(&info->lock); + + if (!dump_block_groups) + return; down_read(&info->groups_sem); list_for_each_entry(cache, &info->block_groups, list) { @@ -4145,7 +4438,7 @@ again: printk(KERN_ERR "btrfs allocation failed flags %llu, " "wanted %llu\n", (unsigned long long)data, (unsigned long long)num_bytes); - dump_space_info(sinfo, num_bytes); + dump_space_info(sinfo, num_bytes, 1); } return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0cb88f8146ea..de1793ba004a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, return NULL; } +static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, + struct extent_state *other) +{ + if (tree->ops && tree->ops->merge_extent_hook) + tree->ops->merge_extent_hook(tree->mapping->host, new, + other); +} + /* * utility function to look for merge candidates inside a given range. * Any extents with matching state are merged together into a single @@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree, other = rb_entry(other_node, struct extent_state, rb_node); if (other->end == state->start - 1 && other->state == state->state) { + merge_cb(tree, state, other); state->start = other->start; other->tree = NULL; rb_erase(&other->rb_node, &tree->state); @@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree, other = rb_entry(other_node, struct extent_state, rb_node); if (other->start == state->end + 1 && other->state == state->state) { + merge_cb(tree, state, other); other->start = state->start; state->tree = NULL; rb_erase(&state->rb_node, &tree->state); free_extent_state(state); + state = NULL; } } + return 0; } -static void set_state_cb(struct extent_io_tree *tree, +static int set_state_cb(struct extent_io_tree *tree, struct extent_state *state, unsigned long bits) { if (tree->ops && tree->ops->set_bit_hook) { - tree->ops->set_bit_hook(tree->mapping->host, state->start, - state->end, state->state, bits); + return tree->ops->set_bit_hook(tree->mapping->host, + state->start, state->end, + state->state, bits); } + + return 0; } static void clear_state_cb(struct extent_io_tree *tree, struct extent_state *state, unsigned long bits) { - if (tree->ops && tree->ops->clear_bit_hook) { - tree->ops->clear_bit_hook(tree->mapping->host, state->start, - state->end, state->state, bits); - } + if (tree->ops && tree->ops->clear_bit_hook) + tree->ops->clear_bit_hook(tree->mapping->host, state, bits); } /* @@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree, int bits) { struct rb_node *node; + int ret; if (end < start) { printk(KERN_ERR "btrfs end < start %llu %llu\n", @@ -365,11 +379,14 @@ static int insert_state(struct extent_io_tree *tree, (unsigned long long)start); WARN_ON(1); } - if (bits & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; state->start = start; state->end = end; - set_state_cb(tree, state, bits); + ret = set_state_cb(tree, state, bits); + if (ret) + return ret; + + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; state->state |= bits; node = tree_insert(&tree->state, end, &state->rb_node); if (node) { @@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree, return 0; } +static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, + u64 split) +{ + if (tree->ops && tree->ops->split_extent_hook) + return tree->ops->split_extent_hook(tree->mapping->host, + orig, split); + return 0; +} + /* * split a given extent state struct in two, inserting the preallocated * struct 'prealloc' as the newly created second half. 'split' indicates an @@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, struct extent_state *prealloc, u64 split) { struct rb_node *node; + + split_cb(tree, orig, split); + prealloc->start = orig->start; prealloc->end = split - 1; prealloc->state = orig->state; @@ -542,8 +571,8 @@ hit_next: if (err) goto out; if (state->end <= end) { - set |= clear_state_bit(tree, state, bits, - wake, delete); + set |= clear_state_bit(tree, state, bits, wake, + delete); if (last_end == (u64)-1) goto out; start = last_end + 1; @@ -561,12 +590,11 @@ hit_next: prealloc = alloc_extent_state(GFP_ATOMIC); err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); - if (wake) wake_up(&state->wq); - set |= clear_state_bit(tree, prealloc, bits, - wake, delete); + set |= clear_state_bit(tree, prealloc, bits, wake, delete); + prealloc = NULL; goto out; } @@ -667,16 +695,23 @@ out: return 0; } -static void set_state_bits(struct extent_io_tree *tree, +static int set_state_bits(struct extent_io_tree *tree, struct extent_state *state, int bits) { + int ret; + + ret = set_state_cb(tree, state, bits); + if (ret) + return ret; + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; tree->dirty_bytes += range; } - set_state_cb(tree, state, bits); state->state |= bits; + + return 0; } static void cache_state(struct extent_state *state, @@ -758,7 +793,10 @@ hit_next: goto out; } - set_state_bits(tree, state, bits); + err = set_state_bits(tree, state, bits); + if (err) + goto out; + cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) @@ -805,7 +843,9 @@ hit_next: if (err) goto out; if (state->end <= end) { - set_state_bits(tree, state, bits); + err = set_state_bits(tree, state, bits); + if (err) + goto out; cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) @@ -829,11 +869,13 @@ hit_next: this_end = last_start - 1; err = insert_state(tree, prealloc, start, this_end, bits); - cache_state(prealloc, cached_state); - prealloc = NULL; BUG_ON(err == -EEXIST); - if (err) + if (err) { + prealloc = NULL; goto out; + } + cache_state(prealloc, cached_state); + prealloc = NULL; start = this_end + 1; goto search_again; } @@ -852,7 +894,11 @@ hit_next: err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); - set_state_bits(tree, prealloc, bits); + err = set_state_bits(tree, prealloc, bits); + if (err) { + prealloc = NULL; + goto out; + } cache_state(prealloc, cached_state); merge_state(tree, prealloc); prealloc = NULL; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 14ed16fd862d..4794ec891fed 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -60,8 +60,13 @@ struct extent_io_ops { struct extent_state *state, int uptodate); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); - int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, - unsigned long old, unsigned long bits); + int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, + unsigned long bits); + int (*merge_extent_hook)(struct inode *inode, + struct extent_state *new, + struct extent_state *other); + int (*split_extent_hook)(struct inode *inode, + struct extent_state *orig, u64 split); int (*write_cache_pages_lock_hook)(struct page *page); }; @@ -79,10 +84,14 @@ struct extent_state { u64 start; u64 end; /* inclusive */ struct rb_node rb_node; + + /* ADD NEW ELEMENTS AFTER THIS */ struct extent_io_tree *tree; wait_queue_head_t wq; atomic_t refs; unsigned long state; + u64 split_start; + u64 split_end; /* for use by the FS */ u64 private; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 571ad3c13b47..f19e1259a971 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -123,7 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, root->sectorsize - 1) & ~((u64)root->sectorsize - 1); end_of_last_block = start_pos + num_bytes - 1; - btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); + err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); + if (err) + return err; + for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); @@ -917,21 +920,35 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, start_pos = pos; vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + /* do the reserve before the mutex lock in case we have to do some + * flushing. We wouldn't deadlock, but this is more polite. + */ + err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (err) + goto out_nolock; + + mutex_lock(&inode->i_mutex); + current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) - goto out_nolock; + goto out; + if (count == 0) - goto out_nolock; + goto out; err = file_remove_suid(file); if (err) - goto out_nolock; + goto out; + file_update_time(file); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); - mutex_lock(&inode->i_mutex); + /* generic_write_checks can change our pos */ + start_pos = pos; + BTRFS_I(inode)->sequence++; first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; @@ -1005,9 +1022,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } if (will_write) { - btrfs_fdatawrite_range(inode->i_mapping, pos, - pos + write_bytes - 1, - WB_SYNC_ALL); + filemap_fdatawrite_range(inode->i_mapping, pos, + pos + write_bytes - 1); } else { balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); @@ -1028,6 +1044,7 @@ out: mutex_unlock(&inode->i_mutex); if (ret) err = ret; + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); out_nolock: kfree(pages); @@ -1184,7 +1201,7 @@ out: return ret > 0 ? EIO : ret; } -static struct vm_operations_struct btrfs_file_vm_ops = { +static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = btrfs_page_mkwrite, }; @@ -1196,7 +1213,7 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } -struct file_operations btrfs_file_operations = { +const struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9b76bcd1c12..112e5aa85892 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -62,7 +62,7 @@ static const struct inode_operations btrfs_special_inode_operations; static const struct inode_operations btrfs_file_inode_operations; static const struct address_space_operations btrfs_aops; static const struct address_space_operations btrfs_symlink_aops; -static struct file_operations btrfs_dir_file_operations; +static const struct file_operations btrfs_dir_file_operations; static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; @@ -1159,6 +1159,83 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, return ret; } +static int btrfs_split_extent_hook(struct inode *inode, + struct extent_state *orig, u64 split) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 size; + + if (!(orig->state & EXTENT_DELALLOC)) + return 0; + + size = orig->end - orig->start + 1; + if (size > root->fs_info->max_extent) { + u64 num_extents; + u64 new_size; + + new_size = orig->end - split + 1; + num_extents = div64_u64(size + root->fs_info->max_extent - 1, + root->fs_info->max_extent); + + /* + * if we break a large extent up then leave delalloc_extents be, + * since we've already accounted for the large extent. + */ + if (div64_u64(new_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent) < num_extents) + return 0; + } + + BTRFS_I(inode)->delalloc_extents++; + + return 0; +} + +/* + * extent_io.c merge_extent_hook, used to track merged delayed allocation + * extents so we can keep track of new extents that are just merged onto old + * extents, such as when we are doing sequential writes, so we can properly + * account for the metadata space we'll need. + */ +static int btrfs_merge_extent_hook(struct inode *inode, + struct extent_state *new, + struct extent_state *other) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 new_size, old_size; + u64 num_extents; + + /* not delalloc, ignore it */ + if (!(other->state & EXTENT_DELALLOC)) + return 0; + + old_size = other->end - other->start + 1; + if (new->start < other->start) + new_size = other->end - new->start + 1; + else + new_size = new->end - other->start + 1; + + /* we're not bigger than the max, unreserve the space and go */ + if (new_size <= root->fs_info->max_extent) { + BTRFS_I(inode)->delalloc_extents--; + return 0; + } + + /* + * If we grew by another max_extent, just return, we want to keep that + * reserved amount. + */ + num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent); + if (div64_u64(new_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent) > num_extents) + return 0; + + BTRFS_I(inode)->delalloc_extents--; + + return 0; +} + /* * extent_io.c set_bit_hook, used to track delayed allocation * bytes in this file, and to maintain the list of inodes that @@ -1167,6 +1244,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { + /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testeing for the DELALLOC @@ -1174,6 +1252,8 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + + BTRFS_I(inode)->delalloc_extents++; btrfs_delalloc_reserve_space(root, inode, end - start + 1); spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; @@ -1190,22 +1270,27 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, - unsigned long old, unsigned long bits) +static int btrfs_clear_bit_hook(struct inode *inode, + struct extent_state *state, unsigned long bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testeing for the DELALLOC * bit, which is only set or cleared with irqs on */ - if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { + if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + BTRFS_I(inode)->delalloc_extents--; + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + spin_lock(&root->fs_info->delalloc_lock); - if (end - start + 1 > root->fs_info->delalloc_bytes) { + if (state->end - state->start + 1 > + root->fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs warning: delalloc account " "%llu %llu\n", - (unsigned long long)end - start + 1, + (unsigned long long) + state->end - state->start + 1, (unsigned long long) root->fs_info->delalloc_bytes); btrfs_delalloc_free_space(root, inode, (u64)-1); @@ -1213,9 +1298,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, BTRFS_I(inode)->delalloc_bytes = 0; } else { btrfs_delalloc_free_space(root, inode, - end - start + 1); - root->fs_info->delalloc_bytes -= end - start + 1; - BTRFS_I(inode)->delalloc_bytes -= end - start + 1; + state->end - + state->start + 1); + root->fs_info->delalloc_bytes -= state->end - + state->start + 1; + BTRFS_I(inode)->delalloc_bytes -= state->end - + state->start + 1; } if (BTRFS_I(inode)->delalloc_bytes == 0 && !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { @@ -2950,7 +3038,12 @@ again: goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end); + if (ret) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + goto out_unlock; + } + ret = 0; if (offset != PAGE_CACHE_SIZE) { kaddr = kmap(page); @@ -2981,15 +3074,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) u64 last_byte; u64 cur_offset; u64 hole_size; - int err; + int err = 0; if (size <= hole_start) return 0; - err = btrfs_check_metadata_free_space(root); - if (err) - return err; - btrfs_truncate_page(inode->i_mapping, inode->i_size); while (1) { @@ -3024,12 +3113,18 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) cur_offset, &hint_byte, 1); if (err) break; + + err = btrfs_reserve_metadata_space(root, 1); + if (err) + break; + err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); + btrfs_unreserve_metadata_space(root, 1); } free_extent_map(em); cur_offset = last_byte; @@ -3990,11 +4085,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - err = btrfs_check_metadata_free_space(root); + /* + * 2 for inode item and ref + * 2 for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto fail; + return err; trans = btrfs_start_transaction(root, 1); + if (!trans) + goto fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -4032,6 +4134,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4052,10 +4155,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - err = btrfs_check_metadata_free_space(root); + /* + * 2 for inode item and ref + * 2 for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto fail; + return err; + trans = btrfs_start_transaction(root, 1); + if (!trans) + goto fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -4096,6 +4207,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4118,10 +4230,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; - btrfs_inc_nlink(inode); - err = btrfs_check_metadata_free_space(root); + /* + * 1 item for inode ref + * 2 items for dir items + */ + err = btrfs_reserve_metadata_space(root, 3); if (err) - goto fail; + return err; + + btrfs_inc_nlink(inode); + err = btrfs_set_inode_index(dir, &index); if (err) goto fail; @@ -4145,6 +4263,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 3); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4164,17 +4283,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; - err = btrfs_check_metadata_free_space(root); + /* + * 2 items for inode and ref + * 2 items for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto out_unlock; + return err; trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - - if (IS_ERR(trans)) { - err = PTR_ERR(trans); + if (!trans) { + err = -ENOMEM; goto out_unlock; } + btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { @@ -4223,6 +4346,7 @@ out_fail: btrfs_end_transaction_throttle(trans, root); out_unlock: + btrfs_unreserve_metadata_space(root, 5); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -4747,6 +4871,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; } + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + ret = VM_FAULT_SIGBUS; + goto out; + } + ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ again: lock_page(page); @@ -4778,7 +4909,23 @@ again: goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + /* + * XXX - page_mkwrite gets called every time the page is dirtied, even + * if it was already dirty, so for space accounting reasons we need to + * clear any delalloc bits for the range we are fixing to save. There + * is probably a better way to do this, but for now keep consistent with + * prepare_pages in the normal write path. + */ + clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); + + ret = btrfs_set_extent_delalloc(inode, page_start, page_end); + if (ret) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + ret = VM_FAULT_SIGBUS; + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + goto out_unlock; + } ret = 0; /* page is wholly or partially inside EOF */ @@ -4801,6 +4948,7 @@ again: unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); if (!ret) return VM_FAULT_LOCKED; unlock_page(page); @@ -4917,6 +5065,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return NULL; ei->last_trans = 0; ei->logged_trans = 0; + ei->delalloc_extents = 0; + ei->delalloc_reserved_extents = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->ordered_operations); @@ -5070,7 +5220,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; - ret = btrfs_check_metadata_free_space(root); + /* + * 2 items for dir items + * 1 item for orphan entry + * 1 item for ref + */ + ret = btrfs_reserve_metadata_space(root, 4); if (ret) return ret; @@ -5185,6 +5340,8 @@ out_fail: if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&root->fs_info->subvol_sem); + + btrfs_unreserve_metadata_space(root, 4); return ret; } @@ -5256,11 +5413,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - err = btrfs_check_metadata_free_space(root); + /* + * 2 items for inode item and ref + * 2 items for dir items + * 1 item for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto out_fail; + return err; trans = btrfs_start_transaction(root, 1); + if (!trans) + goto out_fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -5341,6 +5505,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); out_fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -5362,6 +5527,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); + + ret = btrfs_reserve_metadata_space(root, 1); + if (ret) + goto out; + ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); @@ -5381,6 +5551,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; + btrfs_unreserve_metadata_space(root, 1); } out: if (cur_offset > start) { @@ -5544,7 +5715,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { .permission = btrfs_permission, }; -static struct file_operations btrfs_dir_file_operations = { +static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = btrfs_real_readdir, @@ -5566,6 +5737,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, + .merge_extent_hook = btrfs_merge_extent_hook, + .split_extent_hook = btrfs_split_extent_hook, }; /* diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a8577a7f26ab..9a780c8d0ac8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -239,7 +239,13 @@ static noinline int create_subvol(struct btrfs_root *root, u64 index = 0; unsigned long nr = 1; - ret = btrfs_check_metadata_free_space(root); + /* + * 1 - inode item + * 2 - refs + * 1 - root item + * 2 - dir items + */ + ret = btrfs_reserve_metadata_space(root, 6); if (ret) return ret; @@ -340,6 +346,9 @@ fail: err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; + + btrfs_unreserve_metadata_space(root, 6); + btrfs_btree_balance_dirty(root, nr); return ret; } @@ -355,19 +364,27 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (!root->ref_cows) return -EINVAL; - ret = btrfs_check_metadata_free_space(root); + /* + * 1 - inode item + * 2 - refs + * 1 - root item + * 2 - dir items + */ + ret = btrfs_reserve_metadata_space(root, 6); if (ret) goto fail_unlock; pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) { ret = -ENOMEM; + btrfs_unreserve_metadata_space(root, 6); goto fail_unlock; } pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); if (!pending_snapshot->name) { ret = -ENOMEM; kfree(pending_snapshot); + btrfs_unreserve_metadata_space(root, 6); goto fail_unlock; } memcpy(pending_snapshot->name, name, namelen); @@ -1215,15 +1232,15 @@ static long btrfs_ioctl_trans_start(struct file *file) struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - int ret = 0; + int ret; + ret = -EPERM; if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + goto out; - if (file->private_data) { - ret = -EINPROGRESS; + ret = -EINPROGRESS; + if (file->private_data) goto out; - } ret = mnt_want_write(file->f_path.mnt); if (ret) @@ -1233,12 +1250,19 @@ static long btrfs_ioctl_trans_start(struct file *file) root->fs_info->open_ioctl_trans++; mutex_unlock(&root->fs_info->trans_mutex); + ret = -ENOMEM; trans = btrfs_start_ioctl_transaction(root, 0); - if (trans) - file->private_data = trans; - else - ret = -ENOMEM; - /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ + if (!trans) + goto out_drop; + + file->private_data = trans; + return 0; + +out_drop: + mutex_lock(&root->fs_info->trans_mutex); + root->fs_info->open_ioctl_trans--; + mutex_unlock(&root->fs_info->trans_mutex); + mnt_drop_write(file->f_path.mnt); out: return ret; } @@ -1254,24 +1278,20 @@ long btrfs_ioctl_trans_end(struct file *file) struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - int ret = 0; trans = file->private_data; - if (!trans) { - ret = -EINVAL; - goto out; - } - btrfs_end_transaction(trans, root); + if (!trans) + return -EINVAL; file->private_data = NULL; + btrfs_end_transaction(trans, root); + mutex_lock(&root->fs_info->trans_mutex); root->fs_info->open_ioctl_trans--; mutex_unlock(&root->fs_info->trans_mutex); mnt_drop_write(file->f_path.mnt); - -out: - return ret; + return 0; } long btrfs_ioctl(struct file *file, unsigned int diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b5d6d24726b0..897fba835f89 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -458,7 +458,7 @@ void btrfs_start_ordered_extent(struct inode *inode, * start IO on any dirty ones so the wait doesn't stall waiting * for pdflush to find them */ - btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); + filemap_fdatawrite_range(inode->i_mapping, start, end); if (wait) { wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); @@ -488,17 +488,15 @@ again: /* start IO across the range first to instantiate any delalloc * extents */ - btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); /* The compression code will leave pages locked but return from * writepage without setting the page writeback. Starting again * with WB_SYNC_ALL will end up waiting for the IO to actually start. */ - btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); - btrfs_wait_on_page_writeback_range(inode->i_mapping, - start >> PAGE_CACHE_SHIFT, - orig_end >> PAGE_CACHE_SHIFT); + filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; found = 0; @@ -716,89 +714,6 @@ out: } -/** - * taken from mm/filemap.c because it isn't exported - * - * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range - * @mapping: address space structure to write - * @start: offset in bytes where the range starts - * @end: offset in bytes where the range ends (inclusive) - * @sync_mode: enable synchronous operation - * - * Start writeback against all of a mapping's dirty pages that lie - * within the byte offsets <start, end> inclusive. - * - * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as - * opposed to a regular memory cleansing writeback. The difference between - * these two operations is that if a dirty page/buffer is encountered, it must - * be waited upon, and not just skipped over. - */ -int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, - loff_t end, int sync_mode) -{ - struct writeback_control wbc = { - .sync_mode = sync_mode, - .nr_to_write = mapping->nrpages * 2, - .range_start = start, - .range_end = end, - }; - return btrfs_writepages(mapping, &wbc); -} - -/** - * taken from mm/filemap.c because it isn't exported - * - * wait_on_page_writeback_range - wait for writeback to complete - * @mapping: target address_space - * @start: beginning page index - * @end: ending page index - * - * Wait for writeback to complete against pages indexed by start->end - * inclusive - */ -int btrfs_wait_on_page_writeback_range(struct address_space *mapping, - pgoff_t start, pgoff_t end) -{ - struct pagevec pvec; - int nr_pages; - int ret = 0; - pgoff_t index; - - if (end < start) - return 0; - - pagevec_init(&pvec, 0); - index = start; - while ((index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_WRITEBACK, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { - unsigned i; - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* until radix tree lookup accepts end_index */ - if (page->index > end) - continue; - - wait_on_page_writeback(page); - if (PageError(page)) - ret = -EIO; - } - pagevec_release(&pvec); - cond_resched(); - } - - /* Check for outstanding write errors */ - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) - ret = -ENOSPC; - if (test_and_clear_bit(AS_EIO, &mapping->flags)) - ret = -EIO; - - return ret; -} - /* * add a given inode to the list of inodes that must be fully on * disk before a transaction commit finishes. diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 993a7ea45c70..f82e87488ca8 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -153,10 +153,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_ordered_update_i_size(struct inode *inode, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); -int btrfs_wait_on_page_writeback_range(struct address_space *mapping, - pgoff_t start, pgoff_t end); -int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, - loff_t end, int sync_mode); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67035385444c..9de9b2236419 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -344,7 +344,9 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_export_op = &btrfs_export_ops; sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; +#ifdef CONFIG_BTRFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; +#endif tree_root = open_ctree(sb, fs_devices, (char *)data); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 88f866f85e7a..0b8f36d4400a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -186,6 +186,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->alloc_exclude_start = 0; h->delayed_ref_updates = 0; + if (!current->journal_info) + current->journal_info = h; + root->fs_info->running_transaction->use_count++; record_root_in_trans(h, root); mutex_unlock(&root->fs_info->trans_mutex); @@ -317,6 +320,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->writer_wait); put_transaction(cur_trans); mutex_unlock(&info->trans_mutex); + + if (current->journal_info == trans) + current->journal_info = NULL; memset(trans, 0, sizeof(*trans)); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -743,6 +749,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(&pending->root_key, &key, sizeof(key)); fail: kfree(new_root_item); + btrfs_unreserve_metadata_space(root, 6); return ret; } @@ -1059,6 +1066,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); + if (current->journal_info == trans) + current->journal_info = NULL; + kmem_cache_free(btrfs_trans_handle_cachep, trans); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 23e7d36ff325..7eda483d7b5a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -446,8 +446,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) goto error; device->name = kstrdup(orig_dev->name, GFP_NOFS); - if (!device->name) + if (!device->name) { + kfree(device); goto error; + } device->devid = orig_dev->devid; device->work.func = pending_bios_fn; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index a9d3bf4d2689..b0fc93f95fd0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -260,7 +260,7 @@ err: * attributes are handled directly. */ struct xattr_handler *btrfs_xattr_handlers[] = { -#ifdef CONFIG_FS_POSIX_ACL +#ifdef CONFIG_BTRFS_POSIX_ACL &btrfs_xattr_acl_access_handler, &btrfs_xattr_acl_default_handler, #endif diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 6994a0f54f02..80f352596807 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -2,6 +2,7 @@ config CIFS tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS + select SLOW_WORK help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 90c5b39f0313..9a5e4f5f3122 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -64,9 +64,6 @@ unsigned int multiuser_mount = 0; unsigned int extended_security = CIFSSEC_DEF; /* unsigned int ntlmv2_support = 0; */ unsigned int sign_CIFS_PDUs = 1; -extern struct task_struct *oplockThread; /* remove sparse warning */ -struct task_struct *oplockThread = NULL; -/* extern struct task_struct * dnotifyThread; remove sparse warning */ static const struct super_operations cifs_super_ops; unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; module_param(CIFSMaxBufSize, int, 0); @@ -972,89 +969,12 @@ cifs_destroy_mids(void) kmem_cache_destroy(cifs_oplock_cachep); } -static int cifs_oplock_thread(void *dummyarg) -{ - struct oplock_q_entry *oplock_item; - struct cifsTconInfo *pTcon; - struct inode *inode; - __u16 netfid; - int rc, waitrc = 0; - - set_freezable(); - do { - if (try_to_freeze()) - continue; - - spin_lock(&cifs_oplock_lock); - if (list_empty(&cifs_oplock_list)) { - spin_unlock(&cifs_oplock_lock); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(39*HZ); - } else { - oplock_item = list_entry(cifs_oplock_list.next, - struct oplock_q_entry, qhead); - cFYI(1, ("found oplock item to write out")); - pTcon = oplock_item->tcon; - inode = oplock_item->pinode; - netfid = oplock_item->netfid; - spin_unlock(&cifs_oplock_lock); - DeleteOplockQEntry(oplock_item); - /* can not grab inode sem here since it would - deadlock when oplock received on delete - since vfs_unlink holds the i_mutex across - the call */ - /* mutex_lock(&inode->i_mutex);*/ - if (S_ISREG(inode->i_mode)) { -#ifdef CONFIG_CIFS_EXPERIMENTAL - if (CIFS_I(inode)->clientCanCacheAll == 0) - break_lease(inode, FMODE_READ); - else if (CIFS_I(inode)->clientCanCacheRead == 0) - break_lease(inode, FMODE_WRITE); -#endif - rc = filemap_fdatawrite(inode->i_mapping); - if (CIFS_I(inode)->clientCanCacheRead == 0) { - waitrc = filemap_fdatawait( - inode->i_mapping); - invalidate_remote_inode(inode); - } - if (rc == 0) - rc = waitrc; - } else - rc = 0; - /* mutex_unlock(&inode->i_mutex);*/ - if (rc) - CIFS_I(inode)->write_behind_rc = rc; - cFYI(1, ("Oplock flush inode %p rc %d", - inode, rc)); - - /* releasing stale oplock after recent reconnect - of smb session using a now incorrect file - handle is not a data integrity issue but do - not bother sending an oplock release if session - to server still is disconnected since oplock - already released by the server in that case */ - if (!pTcon->need_reconnect) { - rc = CIFSSMBLock(0, pTcon, netfid, - 0 /* len */ , 0 /* offset */, 0, - 0, LOCKING_ANDX_OPLOCK_RELEASE, - false /* wait flag */); - cFYI(1, ("Oplock release rc = %d", rc)); - } - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); /* yield in case q were corrupt */ - } - } while (!kthread_should_stop()); - - return 0; -} - static int __init init_cifs(void) { int rc = 0; cifs_proc_init(); INIT_LIST_HEAD(&cifs_tcp_ses_list); - INIT_LIST_HEAD(&cifs_oplock_list); #ifdef CONFIG_CIFS_EXPERIMENTAL INIT_LIST_HEAD(&GlobalDnotifyReqList); INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); @@ -1083,7 +1003,6 @@ init_cifs(void) rwlock_init(&GlobalSMBSeslock); rwlock_init(&cifs_tcp_ses_lock); spin_lock_init(&GlobalMid_Lock); - spin_lock_init(&cifs_oplock_lock); if (cifs_max_pending < 2) { cifs_max_pending = 2; @@ -1118,16 +1037,13 @@ init_cifs(void) if (rc) goto out_unregister_key_type; #endif - oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd"); - if (IS_ERR(oplockThread)) { - rc = PTR_ERR(oplockThread); - cERROR(1, ("error %d create oplock thread", rc)); - goto out_unregister_dfs_key_type; - } + rc = slow_work_register_user(); + if (rc) + goto out_unregister_resolver_key; return 0; - out_unregister_dfs_key_type: + out_unregister_resolver_key: #ifdef CONFIG_CIFS_DFS_UPCALL unregister_key_type(&key_type_dns_resolver); out_unregister_key_type: @@ -1164,7 +1080,6 @@ exit_cifs(void) cifs_destroy_inodecache(); cifs_destroy_mids(); cifs_destroy_request_bufs(); - kthread_stop(oplockThread); } MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6cfc81a32703..5d0fde18039c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -18,6 +18,7 @@ */ #include <linux/in.h> #include <linux/in6.h> +#include <linux/slow-work.h> #include "cifs_fs_sb.h" #include "cifsacl.h" /* @@ -346,14 +347,16 @@ struct cifsFileInfo { /* lock scope id (0 if none) */ struct file *pfile; /* needed for writepage */ struct inode *pInode; /* needed for oplock break */ + struct vfsmount *mnt; struct mutex lock_mutex; struct list_head llist; /* list of byte range locks we have. */ bool closePend:1; /* file is marked to close */ bool invalidHandle:1; /* file closed via session abend */ - bool messageMode:1; /* for pipes: message vs byte mode */ + bool oplock_break_cancelled:1; atomic_t count; /* reference count */ struct mutex fh_mutex; /* prevents reopen race after dead ses*/ struct cifs_search_info srch_inf; + struct slow_work oplock_break; /* slow_work job for oplock breaks */ }; /* Take a reference on the file private data */ @@ -365,8 +368,10 @@ static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) /* Release a reference on the file private data */ static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { - if (atomic_dec_and_test(&cifs_file->count)) + if (atomic_dec_and_test(&cifs_file->count)) { + iput(cifs_file->pInode); kfree(cifs_file); + } } /* @@ -382,7 +387,6 @@ struct cifsInodeInfo { unsigned long time; /* jiffies of last update/check of inode */ bool clientCanCacheRead:1; /* read oplock */ bool clientCanCacheAll:1; /* read and writebehind oplock */ - bool oplockPending:1; bool delete_pending:1; /* DELETE_ON_CLOSE is set */ u64 server_eof; /* current file size on server */ u64 uniqueid; /* server inode number */ @@ -585,9 +589,9 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_LANMAN 0x10010 #define CIFSSEC_MUST_PLNTXT 0x20020 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0xAF0AF /* allows weak security but also krb5 */ +#define CIFSSEC_MASK 0xBF0BF /* allows weak security but also krb5 */ #else -#define CIFSSEC_MASK 0xA70A7 /* current flags supported if weak */ +#define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */ #endif /* UPCALL */ #else /* do not allow weak pw hash */ #ifdef CONFIG_CIFS_UPCALL @@ -669,12 +673,6 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock; */ GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; -/* Global list of oplocks */ -GLOBAL_EXTERN struct list_head cifs_oplock_list; - -/* Protects the cifs_oplock_list */ -GLOBAL_EXTERN spinlock_t cifs_oplock_lock; - /* Outstanding dir notify requests */ GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; /* DirNotify response queue */ @@ -725,3 +723,4 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ +extern const struct slow_work_ops cifs_oplock_break_ops; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index da8fbf565991..6928c24d1d42 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -86,18 +86,17 @@ extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, const int stage, const struct nls_table *nls_cp); extern __u16 GetNextMid(struct TCP_Server_Info *server); -extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, - struct cifsTconInfo *); -extern void DeleteOplockQEntry(struct oplock_q_entry *); -extern void DeleteTconOplockQEntries(struct cifsTconInfo *); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); +extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode, + __u16 fileHandle, struct file *file, + struct vfsmount *mnt, unsigned int oflags); extern int cifs_posix_open(char *full_path, struct inode **pinode, - struct super_block *sb, int mode, int oflags, - int *poplock, __u16 *pnetfid, int xid); + struct vfsmount *mnt, int mode, int oflags, + __u32 *poplock, __u16 *pnetfid, int xid); extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 301e307e1279..941441d3e386 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -94,6 +94,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); open_file->invalidHandle = true; + open_file->oplock_break_cancelled = true; } write_unlock(&GlobalSMBSeslock); /* BB Add call to invalidate_inodes(sb) for all superblocks mounted diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d49682433c20..43003e0bef18 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1670,7 +1670,6 @@ cifs_put_tcon(struct cifsTconInfo *tcon) CIFSSMBTDis(xid, tcon); _FreeXid(xid); - DeleteTconOplockQEntries(tcon); tconInfoFree(tcon); cifs_put_smb_ses(ses); } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index a6424cfc0121..627a60a6c1b1 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -24,6 +24,7 @@ #include <linux/stat.h> #include <linux/slab.h> #include <linux/namei.h> +#include <linux/mount.h> #include "cifsfs.h" #include "cifspdu.h" #include "cifsglob.h" @@ -129,44 +130,45 @@ cifs_bp_rename_retry: return full_path; } -static void -cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle, - struct cifsTconInfo *tcon, bool write_only) +struct cifsFileInfo * +cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, + struct file *file, struct vfsmount *mnt, unsigned int oflags) { int oplock = 0; struct cifsFileInfo *pCifsFile; struct cifsInodeInfo *pCifsInode; + struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (pCifsFile == NULL) - return; + return pCifsFile; if (oplockEnabled) oplock = REQ_OPLOCK; pCifsFile->netfid = fileHandle; pCifsFile->pid = current->tgid; - pCifsFile->pInode = newinode; + pCifsFile->pInode = igrab(newinode); + pCifsFile->mnt = mnt; + pCifsFile->pfile = file; pCifsFile->invalidHandle = false; pCifsFile->closePend = false; mutex_init(&pCifsFile->fh_mutex); mutex_init(&pCifsFile->lock_mutex); INIT_LIST_HEAD(&pCifsFile->llist); atomic_set(&pCifsFile->count, 1); + slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops); - /* set the following in open now - pCifsFile->pfile = file; */ write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist, &tcon->openFileList); + list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); pCifsInode = CIFS_I(newinode); if (pCifsInode) { /* if readable file instance put first in list*/ - if (write_only) + if (oflags & FMODE_READ) + list_add(&pCifsFile->flist, &pCifsInode->openFileList); + else list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); - else - list_add(&pCifsFile->flist, &pCifsInode->openFileList); if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { pCifsInode->clientCanCacheAll = true; @@ -176,18 +178,18 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle, pCifsInode->clientCanCacheRead = true; } write_unlock(&GlobalSMBSeslock); + + return pCifsFile; } int cifs_posix_open(char *full_path, struct inode **pinode, - struct super_block *sb, int mode, int oflags, - int *poplock, __u16 *pnetfid, int xid) + struct vfsmount *mnt, int mode, int oflags, + __u32 *poplock, __u16 *pnetfid, int xid) { int rc; - __u32 oplock; - bool write_only = false; FILE_UNIX_BASIC_INFO *presp_data; __u32 posix_flags = 0; - struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); struct cifs_fattr fattr; cFYI(1, ("posix open %s", full_path)); @@ -223,12 +225,9 @@ int cifs_posix_open(char *full_path, struct inode **pinode, if (oflags & O_DIRECT) posix_flags |= SMB_O_DIRECT; - if (!(oflags & FMODE_READ)) - write_only = true; - mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, - pnetfid, presp_data, &oplock, full_path, + pnetfid, presp_data, poplock, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) @@ -244,7 +243,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, /* get new inode and set it up */ if (*pinode == NULL) { - *pinode = cifs_iget(sb, &fattr); + *pinode = cifs_iget(mnt->mnt_sb, &fattr); if (!*pinode) { rc = -ENOMEM; goto posix_open_ret; @@ -253,7 +252,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, cifs_fattr_to_inode(*pinode, &fattr); } - cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only); + cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags); posix_open_ret: kfree(presp_data); @@ -280,7 +279,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, int rc = -ENOENT; int xid; int create_options = CREATE_NOT_DIR; - int oplock = 0; + __u32 oplock = 0; int oflags; bool posix_create = false; /* @@ -298,7 +297,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; int disposition = FILE_OVERWRITE_IF; - bool write_only = false; xid = GetXid(); @@ -323,7 +321,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { - rc = cifs_posix_open(full_path, &newinode, inode->i_sb, + rc = cifs_posix_open(full_path, &newinode, nd->path.mnt, mode, oflags, &oplock, &fileHandle, xid); /* EIO could indicate that (posix open) operation is not supported, despite what server claimed in capability @@ -351,11 +349,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, desiredAccess = 0; if (oflags & FMODE_READ) desiredAccess |= GENERIC_READ; /* is this too little? */ - if (oflags & FMODE_WRITE) { + if (oflags & FMODE_WRITE) desiredAccess |= GENERIC_WRITE; - if (!(oflags & FMODE_READ)) - write_only = true; - } if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) disposition = FILE_CREATE; @@ -470,8 +465,8 @@ cifs_create_set_dentry: /* mknod case - do not leave file open */ CIFSSMBClose(xid, tcon, fileHandle); } else if (!(posix_create) && (newinode)) { - cifs_fill_fileinfo(newinode, fileHandle, - cifs_sb->tcon, write_only); + cifs_new_fileinfo(newinode, fileHandle, NULL, + nd->path.mnt, oflags); } cifs_create_out: kfree(buf); @@ -611,7 +606,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - int oplock = 0; + __u32 oplock = 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -683,8 +678,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && (nd->intent.open.flags & O_CREAT)) { - rc = cifs_posix_open(full_path, &newInode, - parent_dir_inode->i_sb, + rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index fa7beac8b80e..429337eb7afe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -30,6 +30,7 @@ #include <linux/writeback.h> #include <linux/task_io_accounting_ops.h> #include <linux/delay.h> +#include <linux/mount.h> #include <asm/div64.h> #include "cifsfs.h" #include "cifspdu.h" @@ -39,27 +40,6 @@ #include "cifs_debug.h" #include "cifs_fs_sb.h" -static inline struct cifsFileInfo *cifs_init_private( - struct cifsFileInfo *private_data, struct inode *inode, - struct file *file, __u16 netfid) -{ - memset(private_data, 0, sizeof(struct cifsFileInfo)); - private_data->netfid = netfid; - private_data->pid = current->tgid; - mutex_init(&private_data->fh_mutex); - mutex_init(&private_data->lock_mutex); - INIT_LIST_HEAD(&private_data->llist); - private_data->pfile = file; /* needed for writepage */ - private_data->pInode = inode; - private_data->invalidHandle = false; - private_data->closePend = false; - /* Initialize reference count to one. The private data is - freed on the release of the last reference */ - atomic_set(&private_data->count, 1); - - return private_data; -} - static inline int cifs_convert_flags(unsigned int flags) { if ((flags & O_ACCMODE) == O_RDONLY) @@ -123,9 +103,11 @@ static inline int cifs_get_disposition(unsigned int flags) } /* all arguments to this function must be checked for validity in caller */ -static inline int cifs_posix_open_inode_helper(struct inode *inode, - struct file *file, struct cifsInodeInfo *pCifsInode, - struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) +static inline int +cifs_posix_open_inode_helper(struct inode *inode, struct file *file, + struct cifsInodeInfo *pCifsInode, + struct cifsFileInfo *pCifsFile, __u32 oplock, + u16 netfid) { write_lock(&GlobalSMBSeslock); @@ -219,17 +201,6 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, struct timespec temp; int rc; - /* want handles we can use to read with first - in the list so we do not have to walk the - list to search for one in write_begin */ - if ((file->f_flags & O_ACCMODE) == O_WRONLY) { - list_add_tail(&pCifsFile->flist, - &pCifsInode->openFileList); - } else { - list_add(&pCifsFile->flist, - &pCifsInode->openFileList); - } - write_unlock(&GlobalSMBSeslock); if (pCifsInode->clientCanCacheRead) { /* we have the inode open somewhere else no need to discard cache data */ @@ -279,7 +250,8 @@ client_can_cache: int cifs_open(struct inode *inode, struct file *file) { int rc = -EACCES; - int xid, oplock; + int xid; + __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; @@ -324,7 +296,7 @@ int cifs_open(struct inode *inode, struct file *file) le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, &inode, inode->i_sb, + rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -414,24 +386,17 @@ int cifs_open(struct inode *inode, struct file *file) cFYI(1, ("cifs_open returned 0x%x", rc)); goto out; } - file->private_data = - kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + + pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, + file->f_flags); + file->private_data = pCifsFile; if (file->private_data == NULL) { rc = -ENOMEM; goto out; } - pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); - write_lock(&GlobalSMBSeslock); - list_add(&pCifsFile->tlist, &tcon->openFileList); - pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - if (pCifsInode) { - rc = cifs_open_inode_helper(inode, file, pCifsInode, - pCifsFile, tcon, - &oplock, buf, full_path, xid); - } else { - write_unlock(&GlobalSMBSeslock); - } + rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon, + &oplock, buf, full_path, xid); if (oplock & CIFS_CREATE_ACTION) { /* time to set mode which we can not set earlier due to @@ -474,7 +439,8 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile) static int cifs_reopen_file(struct file *file, bool can_flush) { int rc = -EACCES; - int xid, oplock; + int xid; + __u32 oplock; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; @@ -543,7 +509,7 @@ reopen_error_exit: le64_to_cpu(tcon->fsUnixInfo.Capability))) { int oflags = (int) cifs_posix_convert_flags(file->f_flags); /* can not refresh inode info since size could be stale */ - rc = cifs_posix_open(full_path, NULL, inode->i_sb, + rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &netfid, xid); if (rc == 0) { @@ -2308,6 +2274,73 @@ out: return rc; } +static void +cifs_oplock_break(struct slow_work *work) +{ + struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, + oplock_break); + struct inode *inode = cfile->pInode; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb); + int rc, waitrc = 0; + + if (inode && S_ISREG(inode->i_mode)) { +#ifdef CONFIG_CIFS_EXPERIMENTAL + if (cinode->clientCanCacheAll == 0) + break_lease(inode, FMODE_READ); + else if (cinode->clientCanCacheRead == 0) + break_lease(inode, FMODE_WRITE); +#endif + rc = filemap_fdatawrite(inode->i_mapping); + if (cinode->clientCanCacheRead == 0) { + waitrc = filemap_fdatawait(inode->i_mapping); + invalidate_remote_inode(inode); + } + if (!rc) + rc = waitrc; + if (rc) + cinode->write_behind_rc = rc; + cFYI(1, ("Oplock flush inode %p rc %d", inode, rc)); + } + + /* + * releasing stale oplock after recent reconnect of smb session using + * a now incorrect file handle is not a data integrity issue but do + * not bother sending an oplock release if session to server still is + * disconnected since oplock already released by the server + */ + if (!cfile->closePend && !cfile->oplock_break_cancelled) { + rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0, + LOCKING_ANDX_OPLOCK_RELEASE, false); + cFYI(1, ("Oplock release rc = %d", rc)); + } +} + +static int +cifs_oplock_break_get(struct slow_work *work) +{ + struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, + oplock_break); + mntget(cfile->mnt); + cifsFileInfo_get(cfile); + return 0; +} + +static void +cifs_oplock_break_put(struct slow_work *work) +{ + struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, + oplock_break); + mntput(cfile->mnt); + cifsFileInfo_put(cfile); +} + +const struct slow_work_ops cifs_oplock_break_ops = { + .get_ref = cifs_oplock_break_get, + .put_ref = cifs_oplock_break_put, + .execute = cifs_oplock_break, +}; + const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index e079a9190ec4..0241b25ac33f 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -32,7 +32,6 @@ extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; -extern struct task_struct *oplockThread; /* The xid serves as a useful identifier for each incoming vfs request, in a similar way to the mid which is useful to track each sent smb, @@ -500,6 +499,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) struct cifsTconInfo *tcon; struct cifsInodeInfo *pCifsInode; struct cifsFileInfo *netfile; + int rc; cFYI(1, ("Checking for oplock break or dnotify response")); if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && @@ -562,30 +562,40 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) continue; cifs_stats_inc(&tcon->num_oplock_brks); - write_lock(&GlobalSMBSeslock); + read_lock(&GlobalSMBSeslock); list_for_each(tmp2, &tcon->openFileList) { netfile = list_entry(tmp2, struct cifsFileInfo, tlist); if (pSMB->Fid != netfile->netfid) continue; - write_unlock(&GlobalSMBSeslock); - read_unlock(&cifs_tcp_ses_lock); + /* + * don't do anything if file is about to be + * closed anyway. + */ + if (netfile->closePend) { + read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); + return true; + } + cFYI(1, ("file id match, oplock break")); pCifsInode = CIFS_I(netfile->pInode); pCifsInode->clientCanCacheAll = false; if (pSMB->OplockLevel == 0) pCifsInode->clientCanCacheRead = false; - pCifsInode->oplockPending = true; - AllocOplockQEntry(netfile->pInode, - netfile->netfid, tcon); - cFYI(1, ("about to wake up oplock thread")); - if (oplockThread) - wake_up_process(oplockThread); - + rc = slow_work_enqueue(&netfile->oplock_break); + if (rc) { + cERROR(1, ("failed to enqueue oplock " + "break: %d\n", rc)); + } else { + netfile->oplock_break_cancelled = false; + } + read_unlock(&GlobalSMBSeslock); + read_unlock(&cifs_tcp_ses_lock); return true; } - write_unlock(&GlobalSMBSeslock); + read_unlock(&GlobalSMBSeslock); read_unlock(&cifs_tcp_ses_lock); cFYI(1, ("No matching file for oplock break")); return true; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f823a4a208a7..1f098ca71636 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -146,7 +146,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) } } -void +static void cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, struct cifs_sb_info *cifs_sb) { @@ -161,7 +161,7 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, cifs_fill_common_info(fattr, cifs_sb); } -void +static void cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, struct cifs_sb_info *cifs_sb) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1da4ab250eae..07b8e71544ee 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -103,56 +103,6 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) mempool_free(midEntry, cifs_mid_poolp); } -struct oplock_q_entry * -AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon) -{ - struct oplock_q_entry *temp; - if ((pinode == NULL) || (tcon == NULL)) { - cERROR(1, ("Null parms passed to AllocOplockQEntry")); - return NULL; - } - temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep, - GFP_KERNEL); - if (temp == NULL) - return temp; - else { - temp->pinode = pinode; - temp->tcon = tcon; - temp->netfid = fid; - spin_lock(&cifs_oplock_lock); - list_add_tail(&temp->qhead, &cifs_oplock_list); - spin_unlock(&cifs_oplock_lock); - } - return temp; -} - -void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) -{ - spin_lock(&cifs_oplock_lock); - /* should we check if list empty first? */ - list_del(&oplockEntry->qhead); - spin_unlock(&cifs_oplock_lock); - kmem_cache_free(cifs_oplock_cachep, oplockEntry); -} - - -void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) -{ - struct oplock_q_entry *temp; - - if (tcon == NULL) - return; - - spin_lock(&cifs_oplock_lock); - list_for_each_entry(temp, &cifs_oplock_list, qhead) { - if ((temp->tcon) && (temp->tcon == tcon)) { - list_del(&temp->qhead); - kmem_cache_free(cifs_oplock_cachep, temp); - } - } - spin_unlock(&cifs_oplock_lock); -} - static int smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 5ca3eca70a1e..9630583cef28 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -81,7 +81,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_write(iocb, iov, nr_segs, pos); } -static struct vm_operations_struct ext4_file_vm_ops = { +static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ext4_page_mkwrite, }; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index adb0e72a176d..7db0979c6b72 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -323,7 +323,7 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, /* fat/misc.c */ extern void fat_fs_error(struct super_block *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))) __cold; -extern void fat_clusters_flush(struct super_block *sb); +extern int fat_clusters_flush(struct super_block *sb); extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, __le16 __time, __le16 __date, u8 time_cs); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 04629d1302fc..76b7961ab663 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -451,12 +451,16 @@ static void fat_write_super(struct super_block *sb) static int fat_sync_fs(struct super_block *sb, int wait) { - lock_super(sb); - fat_clusters_flush(sb); - sb->s_dirt = 0; - unlock_super(sb); + int err = 0; - return 0; + if (sb->s_dirt) { + lock_super(sb); + sb->s_dirt = 0; + err = fat_clusters_flush(sb); + unlock_super(sb); + } + + return err; } static void fat_put_super(struct super_block *sb) @@ -812,7 +816,7 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",shortname=mixed"); break; case VFAT_SFN_DISPLAY_LOWER | VFAT_SFN_CREATE_WIN95: - /* seq_puts(m, ",shortname=lower"); */ + seq_puts(m, ",shortname=lower"); break; default: seq_puts(m, ",shortname=unknown"); @@ -963,7 +967,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; if (is_vfat) { - opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; + opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; } else { opts->shortname = 0; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 4e35be873e09..0f55f5cb732f 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -43,19 +43,19 @@ EXPORT_SYMBOL_GPL(fat_fs_error); /* Flushes the number of free clusters on FAT32 */ /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ -void fat_clusters_flush(struct super_block *sb) +int fat_clusters_flush(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh; struct fat_boot_fsinfo *fsinfo; if (sbi->fat_bits != 32) - return; + return 0; bh = sb_bread(sb, sbi->fsinfo_sector); if (bh == NULL) { printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n"); - return; + return -EIO; } fsinfo = (struct fat_boot_fsinfo *)bh->b_data; @@ -74,6 +74,8 @@ void fat_clusters_flush(struct super_block *sb) mark_buffer_dirty(bh); } brelse(bh); + + return 0; } /* diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index cb6e83557112..f565f24019b5 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -499,17 +499,10 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, int charlen; if (utf8) { - int name_len = strlen(name); - - *outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname); - - /* - * We stripped '.'s before and set len appropriately, - * but utf8s_to_utf16s doesn't care about len - */ - *outlen -= (name_len - len); - - if (*outlen > 255) + *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); + if (*outlen < 0) + return *outlen; + else if (*outlen > 255) return -ENAMETOOLONG; op = &outname[*outlen * sizeof(wchar_t)]; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index fb61178c86e3..9d5360c4c2af 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -250,9 +250,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * completion. Caller need not hold sb s_umount semaphore. * */ -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) +void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, + long nr_pages) { struct wb_writeback_args args = { + .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, @@ -1206,7 +1208,7 @@ void writeback_inodes_sb(struct super_block *sb) nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_start_writeback(sb->s_bdi, nr_to_write); + bdi_start_writeback(sb->s_bdi, sb, nr_to_write); } EXPORT_SYMBOL(writeback_inodes_sb); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cbc464043b6f..a3492f7d207c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1313,7 +1313,7 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) return 0; } -static struct vm_operations_struct fuse_file_vm_ops = { +static const struct vm_operations_struct fuse_file_vm_ops = { .close = fuse_vma_close, .fault = filemap_fault, .page_mkwrite = fuse_page_mkwrite, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 166f38fbd246..4eb308aa3234 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -418,7 +418,7 @@ out: return ret; } -static struct vm_operations_struct gfs2_vm_ops = { +static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .page_mkwrite = gfs2_page_mkwrite, }; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 761af77491f5..b0ab5219becb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -770,7 +770,7 @@ static int jbd2_seq_info_release(struct inode *inode, struct file *file) return seq_release(inode, file); } -static struct file_operations jbd2_seq_info_fops = { +static const struct file_operations jbd2_seq_info_fops = { .owner = THIS_MODULE, .open = jbd2_seq_info_open, .read = seq_read, diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 5d8dcb9ee326..15458decdb8a 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -95,7 +95,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, return VM_FAULT_MAJOR; } -static struct vm_operations_struct ncp_file_mmap = +static const struct vm_operations_struct ncp_file_mmap = { .fault = ncp_file_mmap_fault, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 86d6b4db1096..f5fdd39e037a 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -59,7 +59,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); -static struct vm_operations_struct nfs_file_vm_ops; +static const struct vm_operations_struct nfs_file_vm_ops; const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, @@ -572,7 +572,7 @@ out_unlock: return VM_FAULT_SIGBUS; } -static struct vm_operations_struct nfs_file_vm_ops = { +static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nfs_vm_page_mkwrite, }; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 00388d2a3c99..5c01fc148ce8 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -176,7 +176,7 @@ static const struct file_operations exports_operations = { extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); -static struct file_operations pool_stats_operations = { +static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 6a2711f4c321..5941958f1e47 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -36,6 +36,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc) { + memset(btnc, 0, sizeof(*btnc)); INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); spin_lock_init(&btnc->tree_lock); INIT_LIST_HEAD(&btnc->private_list); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 1a4fa04cf071..e097099bfc8f 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -697,7 +697,7 @@ not_empty: return 0; } -struct file_operations nilfs_dir_operations = { +const struct file_operations nilfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = nilfs_readdir, diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index fc8278c77cdd..30292df443ce 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -117,7 +117,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) return 0; } -struct vm_operations_struct nilfs_file_vm_ops = { +static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nilfs_page_mkwrite, }; @@ -134,7 +134,7 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) * We have mostly NULL's here: the current defaults are ok for * the nilfs filesystem. */ -struct file_operations nilfs_file_operations = { +const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2d2c501deb54..5040220c3732 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -400,6 +400,7 @@ int nilfs_read_inode_common(struct inode *inode, ii->i_dir_acl = S_ISREG(inode->i_mode) ? 0 : le32_to_cpu(raw_inode->i_dir_acl); #endif + ii->i_dir_start_lookup = 0; ii->i_cno = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index b18c4998f8d0..f6326112d647 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -433,7 +433,7 @@ static const struct address_space_operations def_mdt_aops = { }; static const struct inode_operations def_mdt_iops; -static struct file_operations def_mdt_fops; +static const struct file_operations def_mdt_fops; /* * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index bad7368782d0..4da6f67e9a91 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -294,9 +294,9 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *); /* * Inodes and files operations */ -extern struct file_operations nilfs_dir_operations; +extern const struct file_operations nilfs_dir_operations; extern const struct inode_operations nilfs_file_inode_operations; -extern struct file_operations nilfs_file_operations; +extern const struct file_operations nilfs_file_operations; extern const struct address_space_operations nilfs_aops; extern const struct inode_operations nilfs_dir_inode_operations; extern const struct inode_operations nilfs_special_inode_operations; diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 2224b4d07bf0..44a88a9fa2c8 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -124,10 +124,10 @@ int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) while (*s && len > 0) { if (*s & 0x80) { size = utf8_to_utf32(s, len, &u); - if (size < 0) { - /* Ignore character and move on */ - size = 1; - } else if (u >= PLANE_SIZE) { + if (size < 0) + return -EINVAL; + + if (u >= PLANE_SIZE) { u -= PLANE_SIZE; *op++ = (wchar_t) (SURROGATE_PAIR | ((u >> 10) & SURROGATE_BITS)); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 09cc25d04611..c452d116b892 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -966,7 +966,7 @@ static ssize_t o2hb_debug_read(struct file *file, char __user *buf, } #endif /* CONFIG_DEBUG_FS */ -static struct file_operations o2hb_debug_fops = { +static const struct file_operations o2hb_debug_fops = { .open = o2hb_debug_open, .release = o2hb_debug_release, .read = o2hb_debug_read, diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index cfb2be708abe..da794bc07a6c 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -207,7 +207,7 @@ static int nst_fop_release(struct inode *inode, struct file *file) return seq_release_private(inode, file); } -static struct file_operations nst_seq_fops = { +static const struct file_operations nst_seq_fops = { .open = nst_fop_open, .read = seq_read, .llseek = seq_lseek, @@ -388,7 +388,7 @@ static int sc_fop_release(struct inode *inode, struct file *file) return seq_release_private(inode, file); } -static struct file_operations sc_seq_fops = { +static const struct file_operations sc_seq_fops = { .open = sc_fop_open, .read = seq_read, .llseek = seq_lseek, diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index ca46002ec10e..42b0bad7a612 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -478,7 +478,7 @@ bail: return -ENOMEM; } -static struct file_operations debug_purgelist_fops = { +static const struct file_operations debug_purgelist_fops = { .open = debug_purgelist_open, .release = debug_buffer_release, .read = debug_buffer_read, @@ -538,7 +538,7 @@ bail: return -ENOMEM; } -static struct file_operations debug_mle_fops = { +static const struct file_operations debug_mle_fops = { .open = debug_mle_open, .release = debug_buffer_release, .read = debug_buffer_read, @@ -741,7 +741,7 @@ static int debug_lockres_release(struct inode *inode, struct file *file) return seq_release_private(inode, file); } -static struct file_operations debug_lockres_fops = { +static const struct file_operations debug_lockres_fops = { .open = debug_lockres_open, .release = debug_lockres_release, .read = seq_read, @@ -925,7 +925,7 @@ bail: return -ENOMEM; } -static struct file_operations debug_state_fops = { +static const struct file_operations debug_state_fops = { .open = debug_state_open, .release = debug_buffer_release, .read = debug_buffer_read, diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index b606496b72ec..39737613424a 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -202,7 +202,7 @@ out: return ret; } -static struct vm_operations_struct ocfs2_file_vm_ops = { +static const struct vm_operations_struct ocfs2_file_vm_ops = { .fault = ocfs2_fault, .page_mkwrite = ocfs2_page_mkwrite, }; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 4cc3c890a2cd..c0e48aeebb1c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -373,7 +373,7 @@ static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, } #endif /* CONFIG_DEBUG_FS */ -static struct file_operations ocfs2_osb_debug_fops = { +static const struct file_operations ocfs2_osb_debug_fops = { .open = ocfs2_osb_debug_open, .release = ocfs2_debug_release, .read = ocfs2_debug_read, diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 3680bae335b5..b42d62419034 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -498,7 +498,7 @@ const struct inode_operations omfs_dir_inops = { .rmdir = omfs_rmdir, }; -struct file_operations omfs_dir_operations = { +const struct file_operations omfs_dir_operations = { .read = generic_read_dir, .readdir = omfs_readdir, .llseek = generic_file_llseek, diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 4845fbb18e6e..399487c09364 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -322,7 +322,7 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, omfs_get_block); } -struct file_operations omfs_file_operations = { +const struct file_operations omfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h index df71039945ac..ebe2fdbe535e 100644 --- a/fs/omfs/omfs.h +++ b/fs/omfs/omfs.h @@ -44,14 +44,14 @@ extern int omfs_allocate_range(struct super_block *sb, int min_request, extern int omfs_clear_range(struct super_block *sb, u64 block, int count); /* dir.c */ -extern struct file_operations omfs_dir_operations; +extern const struct file_operations omfs_dir_operations; extern const struct inode_operations omfs_dir_inops; extern int omfs_make_empty(struct inode *inode, struct super_block *sb); extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, u64 fsblock); /* file.c */ -extern struct file_operations omfs_file_operations; +extern const struct file_operations omfs_file_operations; extern const struct inode_operations omfs_file_inops; extern const struct address_space_operations omfs_aops; extern void omfs_make_empty_table(struct buffer_head *bh, int offset); diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 2524714bece1..60c702bc10ae 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -40,7 +40,7 @@ struct bin_buffer { struct mutex mutex; void *buffer; int mmapped; - struct vm_operations_struct *vm_ops; + const struct vm_operations_struct *vm_ops; struct file *file; struct hlist_node list; }; @@ -331,7 +331,7 @@ static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, } #endif -static struct vm_operations_struct bin_vm_ops = { +static const struct vm_operations_struct bin_vm_ops = { .open = bin_vma_open, .close = bin_vma_close, .fault = bin_fault, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 2e6481a7701c..1009adc8d602 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1534,7 +1534,7 @@ out_unlock: return err; } -static struct vm_operations_struct ubifs_file_vm_ops = { +static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ubifs_vm_page_mkwrite, }; diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 988d8f87bc0f..629370974e57 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -42,7 +42,7 @@ #include <linux/dcache.h> -static struct vm_operations_struct xfs_file_vm_ops; +static const struct vm_operations_struct xfs_file_vm_ops; STATIC ssize_t xfs_file_aio_read( @@ -280,7 +280,7 @@ const struct file_operations xfs_dir_file_operations = { .fsync = xfs_file_fsync, }; -static struct vm_operations_struct xfs_file_vm_ops = { +static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = xfs_vm_page_mkwrite, }; |