summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c86
-rw-r--r--fs/btrfs/backref.c14
-rw-r--r--fs/btrfs/btrfs_inode.h6
-rw-r--r--fs/btrfs/ctree.c20
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c285
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/file.c26
-rw-r--r--fs/btrfs/inode.c59
-rw-r--r--fs/btrfs/ordered-data.c123
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/qgroup.c169
-rw-r--r--fs/btrfs/qgroup.h1
-rw-r--r--fs/btrfs/super.c51
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/ulist.h15
-rw-r--r--fs/cifs/cifsfs.c24
-rw-r--r--fs/cifs/cifsglob.h5
-rw-r--r--fs/cifs/cifspdu.h23
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/misc.c7
-rw-r--r--fs/cifs/readdir.c2
-rw-r--r--fs/cifs/smb1ops.c7
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2misc.c17
-rw-r--r--fs/cifs/smb2ops.c170
-rw-r--r--fs/cifs/smb2pdu.c16
-rw-r--r--fs/cifs/smb2pdu.h6
-rw-r--r--fs/cifs/smbfsctl.h2
-rw-r--r--fs/ext3/super.c5
-rw-r--r--fs/isofs/inode.c15
-rw-r--r--fs/isofs/isofs.h23
-rw-r--r--fs/isofs/rock.c39
-rw-r--r--fs/locks.c86
-rw-r--r--fs/udf/namei.c3
38 files changed, 957 insertions, 437 deletions
diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2cc2674..ae635872affb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -192,7 +192,6 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
}
file->f_flags = O_RDWR;
- file->private_data = ctx;
return file;
}
@@ -202,7 +201,7 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
static const struct dentry_operations ops = {
.d_dname = simple_dname,
};
- return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+ return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
}
/* aio_setup
@@ -556,8 +555,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
struct aio_ring *ring;
spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
+ table = rcu_dereference_raw(mm->ioctx_table);
while (1) {
if (table)
@@ -565,7 +563,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
if (!table->table[i]) {
ctx->id = i;
table->table[i] = ctx;
- rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress,
@@ -579,8 +576,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
}
new_nr = (table ? table->nr : 1) * 4;
-
- rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
@@ -591,8 +586,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
table->nr = new_nr;
spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- old = rcu_dereference(mm->ioctx_table);
+ old = rcu_dereference_raw(mm->ioctx_table);
if (!old) {
rcu_assign_pointer(mm->ioctx_table, table);
@@ -739,12 +733,9 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
-
+ table = rcu_dereference_raw(mm->ioctx_table);
WARN_ON(ctx != table->table[ctx->id]);
table->table[ctx->id] = NULL;
- rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
/* percpu_ref_kill() will do the necessary call_rcu() */
@@ -793,40 +784,30 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
*/
void exit_aio(struct mm_struct *mm)
{
- struct kioctx_table *table;
- struct kioctx *ctx;
- unsigned i = 0;
-
- while (1) {
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
-
- do {
- if (!table || i >= table->nr) {
- rcu_read_unlock();
- rcu_assign_pointer(mm->ioctx_table, NULL);
- if (table)
- kfree(table);
- return;
- }
+ struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
+ int i;
- ctx = table->table[i++];
- } while (!ctx);
+ if (!table)
+ return;
- rcu_read_unlock();
+ for (i = 0; i < table->nr; ++i) {
+ struct kioctx *ctx = table->table[i];
+ if (!ctx)
+ continue;
/*
- * We don't need to bother with munmap() here -
- * exit_mmap(mm) is coming and it'll unmap everything.
- * Since aio_free_ring() uses non-zero ->mmap_size
- * as indicator that it needs to unmap the area,
- * just set it to 0; aio_free_ring() is the only
- * place that uses ->mmap_size, so it's safe.
+ * We don't need to bother with munmap() here - exit_mmap(mm)
+ * is coming and it'll unmap everything. And we simply can't,
+ * this is not necessarily our ->mm.
+ * Since kill_ioctx() uses non-zero ->mmap_size as indicator
+ * that it needs to unmap the area, just set it to 0.
*/
ctx->mmap_size = 0;
-
kill_ioctx(mm, ctx, NULL);
}
+
+ RCU_INIT_POINTER(mm->ioctx_table, NULL);
+ kfree(table);
}
static void put_reqs_available(struct kioctx *ctx, unsigned nr)
@@ -834,10 +815,8 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
struct kioctx_cpu *kcpu;
unsigned long flags;
- preempt_disable();
- kcpu = this_cpu_ptr(ctx->cpu);
-
local_irq_save(flags);
+ kcpu = this_cpu_ptr(ctx->cpu);
kcpu->reqs_available += nr;
while (kcpu->reqs_available >= ctx->req_batch * 2) {
@@ -846,7 +825,6 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
}
local_irq_restore(flags);
- preempt_enable();
}
static bool get_reqs_available(struct kioctx *ctx)
@@ -855,10 +833,8 @@ static bool get_reqs_available(struct kioctx *ctx)
bool ret = false;
unsigned long flags;
- preempt_disable();
- kcpu = this_cpu_ptr(ctx->cpu);
-
local_irq_save(flags);
+ kcpu = this_cpu_ptr(ctx->cpu);
if (!kcpu->reqs_available) {
int old, avail = atomic_read(&ctx->reqs_available);
@@ -878,7 +854,6 @@ static bool get_reqs_available(struct kioctx *ctx)
kcpu->reqs_available--;
out:
local_irq_restore(flags);
- preempt_enable();
return ret;
}
@@ -1047,7 +1022,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
}
EXPORT_SYMBOL(aio_complete);
-/* aio_read_events
+/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
* events fetched
*/
@@ -1270,12 +1245,12 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
if (compat)
ret = compat_rw_copy_check_uvector(rw,
(struct compat_iovec __user *)buf,
- *nr_segs, 1, *iovec, iovec);
+ *nr_segs, UIO_FASTIOV, *iovec, iovec);
else
#endif
ret = rw_copy_check_uvector(rw,
(struct iovec __user *)buf,
- *nr_segs, 1, *iovec, iovec);
+ *nr_segs, UIO_FASTIOV, *iovec, iovec);
if (ret < 0)
return ret;
@@ -1299,9 +1274,8 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
}
/*
- * aio_setup_iocb:
- * Performs the initial checks and aio retry method
- * setup for the kiocb at the time of io submission.
+ * aio_run_iocb:
+ * Performs the initial checks and io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
char __user *buf, bool compat)
@@ -1313,7 +1287,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
fmode_t mode;
aio_rw_op *rw_op;
rw_iter_op *iter_op;
- struct iovec inline_vec, *iovec = &inline_vec;
+ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
switch (opcode) {
@@ -1348,7 +1322,7 @@ rw_common:
if (!ret)
ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
if (ret < 0) {
- if (iovec != &inline_vec)
+ if (iovec != inline_vecs)
kfree(iovec);
return ret;
}
@@ -1395,7 +1369,7 @@ rw_common:
return -EINVAL;
}
- if (iovec != &inline_vec)
+ if (iovec != inline_vecs)
kfree(iovec);
if (ret != -EIOCBQUEUED) {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e25564bfcb46..54a201dac7f9 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
}
if (ret > 0)
goto next;
- ret = ulist_add_merge(parents, eb->start,
- (uintptr_t)eie,
- (u64 *)&old, GFP_NOFS);
+ ret = ulist_add_merge_ptr(parents, eb->start,
+ eie, (void **)&old, GFP_NOFS);
if (ret < 0)
break;
if (!ret && extent_item_pos) {
@@ -1001,16 +1000,19 @@ again:
ret = -EIO;
goto out;
}
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
if (ret < 0)
goto out;
ref->inode_list = eie;
}
- ret = ulist_add_merge(refs, ref->parent,
- (uintptr_t)ref->inode_list,
- (u64 *)&eie, GFP_NOFS);
+ ret = ulist_add_merge_ptr(refs, ref->parent,
+ ref->inode_list,
+ (void **)&eie, GFP_NOFS);
if (ret < 0)
goto out;
if (!ret && extent_item_pos) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4794923c410c..43527fd78825 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -84,12 +84,6 @@ struct btrfs_inode {
*/
struct list_head delalloc_inodes;
- /*
- * list for tracking inodes that must be sent to disk before a
- * rename or truncate commit
- */
- struct list_head ordered_operations;
-
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index aeab453b8e24..44ee5d2e52a4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -280,9 +280,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 0);
if (ret)
return ret;
@@ -1035,14 +1035,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if ((owner == root->root_key.objectid ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
- ret = btrfs_inc_ref(trans, root, buf, 1, 1);
+ ret = btrfs_inc_ref(trans, root, buf, 1);
BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
- ret = btrfs_dec_ref(trans, root, buf, 0, 1);
+ ret = btrfs_dec_ref(trans, root, buf, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
BUG_ON(ret); /* -ENOMEM */
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -1050,9 +1050,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 0);
BUG_ON(ret); /* -ENOMEM */
}
if (new_flags != 0) {
@@ -1069,11 +1069,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_dec_ref(trans, root, buf, 1, 1);
+ ret = btrfs_dec_ref(trans, root, buf, 1);
BUG_ON(ret); /* -ENOMEM */
}
clean_tree_block(trans, root, buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index be91397f4e92..8e29b614fe93 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3326,9 +3326,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota);
+ struct extent_buffer *buf, int full_backref);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota);
+ struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2aa..d0ed9e664f7d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -60,8 +60,6 @@ static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
- struct btrfs_root *root);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
@@ -3829,34 +3827,6 @@ static void btrfs_error_commit_super(struct btrfs_root *root)
btrfs_cleanup_transaction(root);
}
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
- struct btrfs_root *root)
-{
- struct btrfs_inode *btrfs_inode;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- mutex_lock(&root->fs_info->ordered_operations_mutex);
- spin_lock(&root->fs_info->ordered_root_lock);
-
- list_splice_init(&t->ordered_operations, &splice);
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
-
- list_del_init(&btrfs_inode->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
-
- btrfs_invalidate_inodes(btrfs_inode->root);
-
- spin_lock(&root->fs_info->ordered_root_lock);
- }
-
- spin_unlock(&root->fs_info->ordered_root_lock);
- mutex_unlock(&root->fs_info->ordered_operations_mutex);
-}
-
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
{
struct btrfs_ordered_extent *ordered;
@@ -4093,8 +4063,6 @@ again:
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root)
{
- btrfs_destroy_ordered_operations(cur_trans, root);
-
btrfs_destroy_delayed_refs(cur_trans, root);
cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f9..102ed3143976 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3057,7 +3057,7 @@ out:
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- int full_backref, int inc, int no_quota)
+ int full_backref, int inc)
{
u64 bytenr;
u64 num_bytes;
@@ -3111,7 +3111,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, key.objectid,
- key.offset, no_quota);
+ key.offset, 1);
if (ret)
goto fail;
} else {
@@ -3119,7 +3119,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
num_bytes = btrfs_level_size(root, level - 1);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, level - 1, 0,
- no_quota);
+ 1);
if (ret)
goto fail;
}
@@ -3130,15 +3130,15 @@ fail:
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota)
+ struct extent_buffer *buf, int full_backref)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int no_quota)
+ struct extent_buffer *buf, int full_backref)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -7478,6 +7478,220 @@ reada:
wc->reada_slot = slot;
}
+static int account_leaf_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+ int nr = btrfs_header_nritems(eb);
+ int i, extent_type, ret;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ u64 bytenr, num_bytes;
+
+ for (i = 0; i < nr; i++) {
+ btrfs_item_key_to_cpu(eb, &key, i);
+
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
+ /* filter out non qgroup-accountable extents */
+ extent_type = btrfs_file_extent_type(eb, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+ continue;
+
+ bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (!bytenr)
+ continue;
+
+ num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+
+ ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+ root->objectid,
+ bytenr, num_bytes,
+ BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Walk up the tree from the bottom, freeing leaves and any interior
+ * nodes which have had all slots visited. If a node (leaf or
+ * interior) is freed, the node above it will have it's slot
+ * incremented. The root node will never be freed.
+ *
+ * At the end of this function, we should have a path which has all
+ * slots incremented to the next position for a search. If we need to
+ * read a new node it will be NULL and the node above it will have the
+ * correct slot selected for a later read.
+ *
+ * If we increment the root nodes slot counter past the number of
+ * elements, 1 is returned to signal completion of the search.
+ */
+static int adjust_slots_upwards(struct btrfs_root *root,
+ struct btrfs_path *path, int root_level)
+{
+ int level = 0;
+ int nr, slot;
+ struct extent_buffer *eb;
+
+ if (root_level == 0)
+ return 1;
+
+ while (level <= root_level) {
+ eb = path->nodes[level];
+ nr = btrfs_header_nritems(eb);
+ path->slots[level]++;
+ slot = path->slots[level];
+ if (slot >= nr || level == 0) {
+ /*
+ * Don't free the root - we will detect this
+ * condition after our loop and return a
+ * positive value for caller to stop walking the tree.
+ */
+ if (level != root_level) {
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
+ path->locks[level] = 0;
+
+ free_extent_buffer(eb);
+ path->nodes[level] = NULL;
+ path->slots[level] = 0;
+ }
+ } else {
+ /*
+ * We have a valid slot to walk back down
+ * from. Stop here so caller can process these
+ * new nodes.
+ */
+ break;
+ }
+
+ level++;
+ }
+
+ eb = path->nodes[root_level];
+ if (path->slots[root_level] >= btrfs_header_nritems(eb))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * root_eb is the subtree root and is locked before this function is called.
+ */
+static int account_shared_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *root_eb,
+ u64 root_gen,
+ int root_level)
+{
+ int ret = 0;
+ int level;
+ struct extent_buffer *eb = root_eb;
+ struct btrfs_path *path = NULL;
+
+ BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
+ BUG_ON(root_eb == NULL);
+
+ if (!root->fs_info->quota_enabled)
+ return 0;
+
+ if (!extent_buffer_uptodate(root_eb)) {
+ ret = btrfs_read_buffer(root_eb, root_gen);
+ if (ret)
+ goto out;
+ }
+
+ if (root_level == 0) {
+ ret = account_leaf_items(trans, root, root_eb);
+ goto out;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Walk down the tree. Missing extent blocks are filled in as
+ * we go. Metadata is accounted every time we read a new
+ * extent block.
+ *
+ * When we reach a leaf, we account for file extent items in it,
+ * walk back up the tree (adjusting slot pointers as we go)
+ * and restart the search process.
+ */
+ extent_buffer_get(root_eb); /* For path */
+ path->nodes[root_level] = root_eb;
+ path->slots[root_level] = 0;
+ path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
+walk_down:
+ level = root_level;
+ while (level >= 0) {
+ if (path->nodes[level] == NULL) {
+ int child_bsize = root->nodesize;
+ int parent_slot;
+ u64 child_gen;
+ u64 child_bytenr;
+
+ /* We need to get child blockptr/gen from
+ * parent before we can read it. */
+ eb = path->nodes[level + 1];
+ parent_slot = path->slots[level + 1];
+ child_bytenr = btrfs_node_blockptr(eb, parent_slot);
+ child_gen = btrfs_node_ptr_generation(eb, parent_slot);
+
+ eb = read_tree_block(root, child_bytenr, child_bsize,
+ child_gen);
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ path->nodes[level] = eb;
+ path->slots[level] = 0;
+
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
+
+ ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+ root->objectid,
+ child_bytenr,
+ child_bsize,
+ BTRFS_QGROUP_OPER_SUB_SUBTREE,
+ 0);
+ if (ret)
+ goto out;
+
+ }
+
+ if (level == 0) {
+ ret = account_leaf_items(trans, root, path->nodes[level]);
+ if (ret)
+ goto out;
+
+ /* Nonzero return here means we completed our search */
+ ret = adjust_slots_upwards(root, path, root_level);
+ if (ret)
+ break;
+
+ /* Restart search with new slots */
+ goto walk_down;
+ }
+
+ level--;
+ }
+
+ ret = 0;
+out:
+ btrfs_free_path(path);
+
+ return ret;
+}
+
/*
* helper to process tree block while walking down the tree.
*
@@ -7532,9 +7746,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
/* wc->stage == UPDATE_BACKREF */
if (!(wc->flags[level] & flag)) {
BUG_ON(!path->locks[level]);
- ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
+ ret = btrfs_inc_ref(trans, root, eb, 1);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
+ ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
eb->len, flag,
@@ -7581,6 +7795,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
int level = wc->level;
int reada = 0;
int ret = 0;
+ bool need_account = false;
generation = btrfs_node_ptr_generation(path->nodes[level],
path->slots[level]);
@@ -7626,6 +7841,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (wc->stage == DROP_REFERENCE) {
if (wc->refs[level - 1] > 1) {
+ need_account = true;
if (level == 1 &&
(wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
goto skip;
@@ -7689,6 +7905,16 @@ skip:
parent = 0;
}
+ if (need_account) {
+ ret = account_shared_subtree(trans, root, next,
+ generation, level - 1);
+ if (ret) {
+ printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+ "%d accounting shared subtree. Quota "
+ "is out of sync, rescan required.\n",
+ root->fs_info->sb->s_id, ret);
+ }
+ }
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0, 0);
BUG_ON(ret); /* -ENOMEM */
@@ -7769,12 +7995,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (wc->refs[level] == 1) {
if (level == 0) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- ret = btrfs_dec_ref(trans, root, eb, 1,
- wc->for_reloc);
+ ret = btrfs_dec_ref(trans, root, eb, 1);
else
- ret = btrfs_dec_ref(trans, root, eb, 0,
- wc->for_reloc);
+ ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
+ ret = account_leaf_items(trans, root, eb);
+ if (ret) {
+ printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+ "%d accounting leaf items. Quota "
+ "is out of sync, rescan required.\n",
+ root->fs_info->sb->s_id, ret);
+ }
}
/* make block locked assertion in clean_tree_block happy */
if (!path->locks[level] &&
@@ -7900,6 +8131,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
int level;
bool root_dropped = false;
+ btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
+
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
@@ -8025,6 +8258,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
goto out_end_trans;
}
+ /*
+ * Qgroup update accounting is run from
+ * delayed ref handling. This usually works
+ * out because delayed refs are normally the
+ * only way qgroup updates are added. However,
+ * we may have added updates during our tree
+ * walk so run qgroups here to make sure we
+ * don't lose any updates.
+ */
+ ret = btrfs_delayed_qgroup_accounting(trans,
+ root->fs_info);
+ if (ret)
+ printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+ "running qgroup updates "
+ "during snapshot delete. "
+ "Quota is out of sync, "
+ "rescan required.\n", ret);
+
btrfs_end_transaction_throttle(trans, tree_root);
if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8078,6 +8329,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
root_dropped = true;
out_end_trans:
+ ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
+ if (ret)
+ printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+ "running qgroup updates "
+ "during snapshot delete. "
+ "Quota is out of sync, "
+ "rescan required.\n", ret);
+
btrfs_end_transaction_throttle(trans, tree_root);
out_free:
kfree(wc);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f46cfe45d686..54c84daec9b5 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -756,7 +756,7 @@ again:
found_next = 1;
if (ret != 0)
goto insert;
- slot = 0;
+ slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1f2b99cb55ea..d3afac292d67 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1838,33 +1838,9 @@ out:
int btrfs_release_file(struct inode *inode, struct file *filp)
{
- /*
- * ordered_data_close is set by settattr when we are about to truncate
- * a file from a non-zero size to a zero size. This tries to
- * flush down new bytes that may have been written if the
- * application were using truncate to replace a file in place.
- */
- if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags)) {
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /*
- * We need to block on a committing transaction to keep us from
- * throwing a ordered operation on to the list and causing
- * something like sync to deadlock trying to flush out this
- * inode.
- */
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
- btrfs_end_transaction(trans, root);
- if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
- filemap_flush(inode->i_mapping);
- }
if (filp->private_data)
btrfs_ioctl_trans_end(filp);
+ filemap_flush(inode->i_mapping);
return 0;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3183742d6f0d..03708ef3deef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -709,6 +709,18 @@ retry:
unlock_extent(io_tree, async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
+
+ /*
+ * we need to redirty the pages if we decide to
+ * fallback to uncompressed IO, otherwise we
+ * will not submit these pages down to lower
+ * layers.
+ */
+ extent_range_redirty_for_io(inode,
+ async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1);
+
goto retry;
}
goto out_free;
@@ -7939,27 +7951,6 @@ static int btrfs_truncate(struct inode *inode)
BUG_ON(ret);
/*
- * setattr is responsible for setting the ordered_data_close flag,
- * but that is only tested during the last file release. That
- * could happen well after the next commit, leaving a great big
- * window where new writes may get lost if someone chooses to write
- * to this file after truncating to zero
- *
- * The inode doesn't have any dirty data here, and so if we commit
- * this is a noop. If someone immediately starts writing to the inode
- * it is very likely we'll catch some of their writes in this
- * transaction, and the commit will find this file on the ordered
- * data list with good things to send down.
- *
- * This is a best effort solution, there is still a window where
- * using truncate to replace the contents of the file will
- * end up with a zero length file after a crash.
- */
- if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags))
- btrfs_add_ordered_operation(trans, root, inode);
-
- /*
* So if we truncate and then write and fsync we normally would just
* write the extents that changed, which is a problem if we need to
* first truncate that entire inode. So set this flag so we write out
@@ -8106,7 +8097,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
- INIT_LIST_HEAD(&ei->ordered_operations);
RB_CLEAR_NODE(&ei->rb_node);
return inode;
@@ -8146,17 +8136,6 @@ void btrfs_destroy_inode(struct inode *inode)
if (!root)
goto free;
- /*
- * Make sure we're properly removed from the ordered operation
- * lists.
- */
- smp_mb();
- if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@ -8338,12 +8317,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = 0;
/*
- * we're using rename to replace one file with another.
- * and the replacement file is large. Start IO on it now so
- * we don't add too much work to the end of the transaction
+ * we're using rename to replace one file with another. Start IO on it
+ * now so we don't add too much work to the end of the transaction
*/
- if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
- old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+ if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
filemap_flush(old_inode->i_mapping);
/* close the racy window with snapshot create/destroy ioctl */
@@ -8391,12 +8368,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
btrfs_pin_log_trans(root);
}
- /*
- * make sure the inode gets flushed if it is replacing
- * something.
- */
- if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
- btrfs_add_ordered_operation(trans, root, old_inode);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7187b14faa6c..963895c1f801 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct inode *inode,
trace_btrfs_ordered_extent_remove(inode, entry);
- /*
- * we have no more ordered extents for this inode and
- * no dirty pages. We can safely remove it from the
- * list of ordered extents
- */
- if (RB_EMPTY_ROOT(&tree->tree) &&
- !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (!root->nr_ordered_extents) {
spin_lock(&root->fs_info->ordered_root_lock);
BUG_ON(list_empty(&root->ordered_root));
@@ -687,81 +675,6 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
}
/*
- * this is used during transaction commit to write all the inodes
- * added to the ordered operation list. These files must be fully on
- * disk before the transaction commits.
- *
- * we have two modes here, one is to just start the IO via filemap_flush
- * and the other is to wait for all the io. When we wait, we have an
- * extra check to make sure the ordered operation list really is empty
- * before we return
- */
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int wait)
-{
- struct btrfs_inode *btrfs_inode;
- struct inode *inode;
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct list_head splice;
- struct list_head works;
- struct btrfs_delalloc_work *work, *next;
- int ret = 0;
-
- INIT_LIST_HEAD(&splice);
- INIT_LIST_HEAD(&works);
-
- mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
- spin_lock(&root->fs_info->ordered_root_lock);
- list_splice_init(&cur_trans->ordered_operations, &splice);
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
- inode = &btrfs_inode->vfs_inode;
-
- list_del_init(&btrfs_inode->ordered_operations);
-
- /*
- * the inode may be getting freed (in sys_unlink path).
- */
- inode = igrab(inode);
- if (!inode)
- continue;
-
- if (!wait)
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &cur_trans->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
-
- work = btrfs_alloc_delalloc_work(inode, wait, 1);
- if (!work) {
- spin_lock(&root->fs_info->ordered_root_lock);
- if (list_empty(&BTRFS_I(inode)->ordered_operations))
- list_add_tail(&btrfs_inode->ordered_operations,
- &splice);
- list_splice_tail(&splice,
- &cur_trans->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- ret = -ENOMEM;
- goto out;
- }
- list_add_tail(&work->list, &works);
- btrfs_queue_work(root->fs_info->flush_workers,
- &work->work);
-
- cond_resched();
- spin_lock(&root->fs_info->ordered_root_lock);
- }
- spin_unlock(&root->fs_info->ordered_root_lock);
-out:
- list_for_each_entry_safe(work, next, &works, list) {
- list_del_init(&work->list);
- btrfs_wait_and_free_delalloc_work(work);
- }
- mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
- return ret;
-}
-
-/*
* Used to start IO or wait for a given ordered extent to finish.
*
* If wait is one, this effectively waits on page writeback for all the pages
@@ -1120,42 +1033,6 @@ out:
return index;
}
-
-/*
- * add a given inode to the list of inodes that must be fully on
- * disk before a transaction commit finishes.
- *
- * This basically gives us the ext3 style data=ordered mode, and it is mostly
- * used to make sure renamed files are fully on disk.
- *
- * It is a noop if the inode is already fully on disk.
- *
- * If trans is not null, we'll do a friendly check for a transaction that
- * is already flushing things and force the IO down ourselves.
- */
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
- u64 last_mod;
-
- last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
-
- /*
- * if this file hasn't been changed since the last transaction
- * commit, we can safely return without doing anything
- */
- if (last_mod <= root->fs_info->last_trans_committed)
- return;
-
- spin_lock(&root->fs_info->ordered_root_lock);
- if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &cur_trans->ordered_operations);
- }
- spin_unlock(&root->fs_info->ordered_root_lock);
-}
-
int __init ordered_data_init(void)
{
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 246897058efb..d81a274d621e 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len);
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int wait);
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
void btrfs_get_logged_extents(struct inode *inode,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98cb6b2630f9..b497498484be 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1201,6 +1201,50 @@ out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
+
+static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
+ struct btrfs_qgroup_operation *oper2)
+{
+ /*
+ * Ignore seq and type here, we're looking for any operation
+ * at all related to this extent on that root.
+ */
+ if (oper1->bytenr < oper2->bytenr)
+ return -1;
+ if (oper1->bytenr > oper2->bytenr)
+ return 1;
+ if (oper1->ref_root < oper2->ref_root)
+ return -1;
+ if (oper1->ref_root > oper2->ref_root)
+ return 1;
+ return 0;
+}
+
+static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_operation *oper)
+{
+ struct rb_node *n;
+ struct btrfs_qgroup_operation *cur;
+ int cmp;
+
+ spin_lock(&fs_info->qgroup_op_lock);
+ n = fs_info->qgroup_op_tree.rb_node;
+ while (n) {
+ cur = rb_entry(n, struct btrfs_qgroup_operation, n);
+ cmp = comp_oper_exist(cur, oper);
+ if (cmp < 0) {
+ n = n->rb_right;
+ } else if (cmp) {
+ n = n->rb_left;
+ } else {
+ spin_unlock(&fs_info->qgroup_op_lock);
+ return -EEXIST;
+ }
+ }
+ spin_unlock(&fs_info->qgroup_op_lock);
+ return 0;
+}
+
static int comp_oper(struct btrfs_qgroup_operation *oper1,
struct btrfs_qgroup_operation *oper2)
{
@@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
INIT_LIST_HEAD(&oper->elem.list);
oper->elem.seq = 0;
+
+ if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
+ /*
+ * If any operation for this bytenr/ref_root combo
+ * exists, then we know it's not exclusively owned and
+ * shouldn't be queued up.
+ *
+ * This also catches the case where we have a cloned
+ * extent that gets queued up multiple times during
+ * drop snapshot.
+ */
+ if (qgroup_oper_exists(fs_info, oper)) {
+ kfree(oper);
+ return 0;
+ }
+ }
+
ret = insert_qgroup_oper(fs_info, oper);
if (ret) {
/* Shouldn't happen so have an assert for developers */
@@ -1884,6 +1945,111 @@ out:
}
/*
+ * Process a reference to a shared subtree. This type of operation is
+ * queued during snapshot removal when we encounter extents which are
+ * shared between more than one root.
+ */
+static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup_operation *oper)
+{
+ struct ulist *roots = NULL;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct btrfs_qgroup_list *glist;
+ struct ulist *parents;
+ int ret = 0;
+ int err;
+ struct btrfs_qgroup *qg;
+ u64 root_obj = 0;
+ struct seq_list elem = {};
+
+ parents = ulist_alloc(GFP_NOFS);
+ if (!parents)
+ return -ENOMEM;
+
+ btrfs_get_tree_mod_seq(fs_info, &elem);
+ ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
+ elem.seq, &roots);
+ btrfs_put_tree_mod_seq(fs_info, &elem);
+ if (ret < 0)
+ return ret;
+
+ if (roots->nnodes != 1)
+ goto out;
+
+ ULIST_ITER_INIT(&uiter);
+ unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
+ /*
+ * If we find our ref root then that means all refs
+ * this extent has to the root have not yet been
+ * deleted. In that case, we do nothing and let the
+ * last ref for this bytenr drive our update.
+ *
+ * This can happen for example if an extent is
+ * referenced multiple times in a snapshot (clone,
+ * etc). If we are in the middle of snapshot removal,
+ * queued updates for such an extent will find the
+ * root if we have not yet finished removing the
+ * snapshot.
+ */
+ if (unode->val == oper->ref_root)
+ goto out;
+
+ root_obj = unode->val;
+ BUG_ON(!root_obj);
+
+ spin_lock(&fs_info->qgroup_lock);
+ qg = find_qgroup_rb(fs_info, root_obj);
+ if (!qg)
+ goto out_unlock;
+
+ qg->excl += oper->num_bytes;
+ qg->excl_cmpr += oper->num_bytes;
+ qgroup_dirty(fs_info, qg);
+
+ /*
+ * Adjust counts for parent groups. First we find all
+ * parents, then in the 2nd loop we do the adjustment
+ * while adding parents of the parents to our ulist.
+ */
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ err = ulist_add(parents, glist->group->qgroupid,
+ ptr_to_u64(glist->group), GFP_ATOMIC);
+ if (err < 0) {
+ ret = err;
+ goto out_unlock;
+ }
+ }
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(parents, &uiter))) {
+ qg = u64_to_ptr(unode->aux);
+ qg->excl += oper->num_bytes;
+ qg->excl_cmpr += oper->num_bytes;
+ qgroup_dirty(fs_info, qg);
+
+ /* Add any parents of the parents */
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ err = ulist_add(parents, glist->group->qgroupid,
+ ptr_to_u64(glist->group), GFP_ATOMIC);
+ if (err < 0) {
+ ret = err;
+ goto out_unlock;
+ }
+ }
+ }
+
+out_unlock:
+ spin_unlock(&fs_info->qgroup_lock);
+
+out:
+ ulist_free(roots);
+ ulist_free(parents);
+ return ret;
+}
+
+/*
* btrfs_qgroup_account_ref is called for every ref that is added to or deleted
* from the fs. First, all roots referencing the extent are searched, and
* then the space is accounted accordingly to the different roots. The
@@ -1920,6 +2086,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
case BTRFS_QGROUP_OPER_SUB_SHARED:
ret = qgroup_shared_accounting(trans, fs_info, oper);
break;
+ case BTRFS_QGROUP_OPER_SUB_SUBTREE:
+ ret = qgroup_subtree_accounting(trans, fs_info, oper);
+ break;
default:
ASSERT(0);
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5952ff1fbd7a..18cc68ca3090 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -44,6 +44,7 @@ enum btrfs_qgroup_operation_type {
BTRFS_QGROUP_OPER_ADD_SHARED,
BTRFS_QGROUP_OPER_SUB_EXCL,
BTRFS_QGROUP_OPER_SUB_SHARED,
+ BTRFS_QGROUP_OPER_SUB_SUBTREE,
};
struct btrfs_qgroup_operation {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 67b48b9a03e0..c4124de4435b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1665,6 +1665,21 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
return 0;
}
+/*
+ * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
+ *
+ * If there's a redundant raid level at DATA block groups, use the respective
+ * multiplier to scale the sizes.
+ *
+ * Unused device space usage is based on simulating the chunk allocator
+ * algorithm that respects the device sizes, order of allocations and the
+ * 'alloc_start' value, this is a close approximation of the actual use but
+ * there are other factors that may change the result (like a new metadata
+ * chunk).
+ *
+ * FIXME: not accurate for mixed block groups, total and free/used are ok,
+ * available appears slightly larger.
+ */
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
@@ -1675,6 +1690,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
u64 total_free_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)fs_info->fsid;
+ unsigned factor = 1;
+ struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
int ret;
/* holding chunk_muext to avoid allocating new chunks */
@@ -1682,30 +1699,52 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+ int i;
+
total_free_data += found->disk_total - found->disk_used;
total_free_data -=
btrfs_account_ro_block_groups_free_space(found);
+
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+ if (!list_empty(&found->block_groups[i])) {
+ switch (i) {
+ case BTRFS_RAID_DUP:
+ case BTRFS_RAID_RAID1:
+ case BTRFS_RAID_RAID10:
+ factor = 2;
+ }
+ }
+ }
}
total_used += found->disk_used;
}
+
rcu_read_unlock();
- buf->f_namelen = BTRFS_NAME_LEN;
- buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
- buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bsize = dentry->d_sb->s_blocksize;
- buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
+ buf->f_blocks >>= bits;
+ buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
+
+ /* Account global block reserve as used, it's in logical size already */
+ spin_lock(&block_rsv->lock);
+ buf->f_bfree -= block_rsv->size >> bits;
+ spin_unlock(&block_rsv->lock);
+
buf->f_bavail = total_free_data;
ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
if (ret) {
mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
- buf->f_bavail += total_free_data;
+ buf->f_bavail += div_u64(total_free_data, factor);
buf->f_bavail = buf->f_bavail >> bits;
mutex_unlock(&fs_info->chunk_mutex);
+ buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_bsize = dentry->d_sb->s_blocksize;
+ buf->f_namelen = BTRFS_NAME_LEN;
+
/* We treat it as constant endianness (it doesn't matter _which_)
because we want the fsid to come out the same whether mounted
on a big-endian or little-endian host */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5f379affdf23..d89c6d3542ca 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -218,7 +218,6 @@ loop:
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
- INIT_LIST_HEAD(&cur_trans->ordered_operations);
INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->switch_commits);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
}
-static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
-
- ret = btrfs_run_delayed_items(trans, root);
- if (ret)
- return ret;
-
- /*
- * rename don't use btrfs_join_transaction, so, once we
- * set the transaction to blocked above, we aren't going
- * to get any new ordered operations. We can safely run
- * it here and no for sure that nothing new will be added
- * to the list
- */
- ret = btrfs_run_ordered_operations(trans, root, 1);
-
- return ret;
-}
-
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
@@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_transaction *prev_trans = NULL;
int ret;
- ret = btrfs_run_ordered_operations(trans, root, 0);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_transaction(trans, root);
- return ret;
- }
-
/* Stop the commit early if ->aborted is set */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
@@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
- ret = btrfs_flush_all_pending_stuffs(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
if (ret)
goto cleanup_transaction;
@@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
extwriter_counter_read(cur_trans) == 0);
/* some pending stuffs might be added after the previous flush. */
- ret = btrfs_flush_all_pending_stuffs(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
if (ret)
goto cleanup_transaction;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7dd558ed0716..579be51b27e5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -55,7 +55,6 @@ struct btrfs_transaction {
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
- struct list_head ordered_operations;
struct list_head pending_chunks;
struct list_head switch_commits;
struct btrfs_delayed_ref_root delayed_refs;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 7f78cbf5cf41..4c29db604bbe 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 *old_aux, gfp_t gfp_mask);
+
+/* just like ulist_add_merge() but take a pointer for the aux data */
+static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
+ void **old_aux, gfp_t gfp_mask)
+{
+#if BITS_PER_LONG == 32
+ u64 old64 = (uintptr_t)*old_aux;
+ int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
+ *old_aux = (void *)((uintptr_t)old64);
+ return ret;
+#else
+ return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
+#endif
+}
+
struct ulist_node *ulist_next(struct ulist *ulist,
struct ulist_iterator *uiter);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ac4f260155c8..889b98455750 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -207,6 +207,19 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
+{
+ struct super_block *sb = file->f_path.dentry->d_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct TCP_Server_Info *server = tcon->ses->server;
+
+ if (server->ops->fallocate)
+ return server->ops->fallocate(file, tcon, mode, off, len);
+
+ return -EOPNOTSUPP;
+}
+
static int cifs_permission(struct inode *inode, int mask)
{
struct cifs_sb_info *cifs_sb;
@@ -812,8 +825,9 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
if (!(S_ISREG(inode->i_mode)))
return -EINVAL;
- /* check if file is oplocked */
- if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
+ /* Check if file is oplocked if this is request for new lease */
+ if (arg == F_UNLCK ||
+ ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode))))
return generic_setlease(file, arg, lease);
else if (tlink_tcon(cfile->tlink)->local_lease &&
@@ -908,6 +922,7 @@ const struct file_operations cifs_file_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_strict_ops = {
@@ -927,6 +942,7 @@ const struct file_operations cifs_file_strict_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_direct_ops = {
@@ -947,6 +963,7 @@ const struct file_operations cifs_file_direct_ops = {
#endif /* CONFIG_CIFS_POSIX */
.llseek = cifs_llseek,
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_nobrl_ops = {
@@ -965,6 +982,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_strict_nobrl_ops = {
@@ -983,6 +1001,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.unlocked_ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_direct_nobrl_ops = {
@@ -1002,6 +1021,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
#endif /* CONFIG_CIFS_POSIX */
.llseek = cifs_llseek,
.setlease = cifs_setlease,
+ .fallocate = cifs_fallocate,
};
const struct file_operations cifs_dir_ops = {
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0012e1e291d4..dfc731b02aa9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -409,6 +409,10 @@ struct smb_version_operations {
/* get mtu credits */
int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int,
unsigned int *, unsigned int *);
+ /* check if we need to issue closedir */
+ bool (*dir_needs_close)(struct cifsFileInfo *);
+ long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t,
+ loff_t);
};
struct smb_version_values {
@@ -883,6 +887,7 @@ struct cifs_tcon {
for this mount even if server would support */
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
+ bool broken_sparse_sup; /* if server or share does not support sparse */
bool need_reconnect:1; /* connection reset, tid now invalid */
#ifdef CONFIG_CIFS_SMB2
bool print:1; /* set if connection to printer share */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 33df36ef9d52..5f9822ac0245 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2253,6 +2253,29 @@ typedef struct {
/* minimum includes first three fields, and empty FS Name */
#define MIN_FS_ATTR_INFO_SIZE 12
+
+/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
+#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
+#define FILE_SUPPORTS_USN_JOURNAL 0x02000000
+#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000
+#define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000
+#define FILE_SUPPORTS_HARD_LINKS 0x00400000
+#define FILE_SUPPORTS_TRANSACTIONS 0x00200000
+#define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000
+#define FILE_READ_ONLY_VOLUME 0x00080000
+#define FILE_NAMED_STREAMS 0x00040000
+#define FILE_SUPPORTS_ENCRYPTION 0x00020000
+#define FILE_SUPPORTS_OBJECT_IDS 0x00010000
+#define FILE_VOLUME_IS_COMPRESSED 0x00008000
+#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100
+#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080
+#define FILE_SUPPORTS_SPARSE_FILES 0x00000040
+#define FILE_VOLUME_QUOTAS 0x00000020
+#define FILE_FILE_COMPRESSION 0x00000010
+#define FILE_PERSISTENT_ACLS 0x00000008
+#define FILE_UNICODE_ON_DISK 0x00000004
+#define FILE_CASE_PRESERVED_NAMES 0x00000002
+#define FILE_CASE_SENSITIVE_SEARCH 0x00000001
typedef struct {
__le32 Attributes;
__le32 MaxPathNameComponentLength;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4ab2f79ffa7a..d5fec92e0360 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -762,7 +762,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
cifs_dbg(FYI, "Freeing private data in close dir\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
if (server->ops->close_dir)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 426d6c6ad8bf..949ec909ec9a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1727,6 +1727,12 @@ unlink_target:
target_dentry, to_name);
}
+ /* force revalidate to go get info when needed */
+ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
+
+ source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
+ target_dir->i_mtime = current_fs_time(source_dir->i_sb);
+
cifs_rename_exit:
kfree(info_buf_source);
kfree(from_name);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 81340c6253eb..b7415d596dbd 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -574,13 +574,6 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
cinode->oplock = 0;
}
-static int
-cifs_oplock_break_wait(void *unused)
-{
- schedule();
- return signal_pending(current) ? -ERESTARTSYS : 0;
-}
-
/*
* We wait for oplock breaks to be processed before we attempt to perform
* writes.
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b15862e0f68c..798c80a41c88 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -593,7 +593,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
/* close and restart search */
cifs_dbg(FYI, "search backing up - close and restart search\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
if (server->ops->close)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 5e8c22d6c7b9..1a6df4b03f67 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1015,6 +1015,12 @@ cifs_wp_retry_size(struct inode *inode)
return CIFS_SB(inode->i_sb)->wsize;
}
+static bool
+cifs_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -1086,6 +1092,7 @@ struct smb_version_operations smb1_operations = {
.create_mf_symlink = cifs_create_mf_symlink,
.is_read_op = cifs_is_read_op,
.wp_retry_size = cifs_wp_retry_size,
+ .dir_needs_close = cifs_dir_needs_close,
#ifdef CONFIG_CIFS_XATTR
.query_all_EAs = CIFSSMBQAllEAs,
.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index e31a9dfdcd39..af59d03db492 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
{STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
{STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
- {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"},
+ {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
{STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
{STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
{STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"},
@@ -298,7 +298,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"},
{STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"},
{STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"},
- {STATUS_INVALID_DEVICE_REQUEST, -EIO, "STATUS_INVALID_DEVICE_REQUEST"},
+ {STATUS_INVALID_DEVICE_REQUEST, -EOPNOTSUPP, "STATUS_INVALID_DEVICE_REQUEST"},
{STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"},
{STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"},
{STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index f2e6ac29a8d6..4aa7a0f07d6e 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -178,9 +178,24 @@ smb2_check_message(char *buf, unsigned int length)
/* Windows 7 server returns 24 bytes more */
if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
return 0;
- /* server can return one byte more */
+ /* server can return one byte more due to implied bcc[0] */
if (clc_len == 4 + len + 1)
return 0;
+
+ /*
+ * MacOS server pads after SMB2.1 write response with 3 bytes
+ * of junk. Other servers match RFC1001 len to actual
+ * SMB2/SMB3 frame length (header + smb2 response specific data)
+ * Log the server error (once), but allow it and continue
+ * since the frame is parseable.
+ */
+ if (clc_len < 4 /* RFC1001 header size */ + len) {
+ printk_once(KERN_WARNING
+ "SMB2 server sent bad RFC1001 len %d not %d\n",
+ len, clc_len - 4);
+ return 0;
+ }
+
return 1;
}
return 0;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 77f8aeb9c2fc..5a48aa290dfe 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -731,11 +731,72 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
return SMB2_write(xid, parms, written, iov, nr_segs);
}
+/* Set or clear the SPARSE_FILE attribute based on value passed in setsparse */
+static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifsFileInfo *cfile, struct inode *inode, __u8 setsparse)
+{
+ struct cifsInodeInfo *cifsi;
+ int rc;
+
+ cifsi = CIFS_I(inode);
+
+ /* if file already sparse don't bother setting sparse again */
+ if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && setsparse)
+ return true; /* already sparse */
+
+ if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && !setsparse)
+ return true; /* already not sparse */
+
+ /*
+ * Can't check for sparse support on share the usual way via the
+ * FS attribute info (FILE_SUPPORTS_SPARSE_FILES) on the share
+ * since Samba server doesn't set the flag on the share, yet
+ * supports the set sparse FSCTL and returns sparse correctly
+ * in the file attributes. If we fail setting sparse though we
+ * mark that server does not support sparse files for this share
+ * to avoid repeatedly sending the unsupported fsctl to server
+ * if the file is repeatedly extended.
+ */
+ if (tcon->broken_sparse_sup)
+ return false;
+
+ rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
+ true /* is_fctl */, &setsparse, 1, NULL, NULL);
+ if (rc) {
+ tcon->broken_sparse_sup = true;
+ cifs_dbg(FYI, "set sparse rc = %d\n", rc);
+ return false;
+ }
+
+ if (setsparse)
+ cifsi->cifsAttrs |= FILE_ATTRIBUTE_SPARSE_FILE;
+ else
+ cifsi->cifsAttrs &= (~FILE_ATTRIBUTE_SPARSE_FILE);
+
+ return true;
+}
+
static int
smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile, __u64 size, bool set_alloc)
{
__le64 eof = cpu_to_le64(size);
+ struct inode *inode;
+
+ /*
+ * If extending file more than one page make sparse. Many Linux fs
+ * make files sparse by default when extending via ftruncate
+ */
+ inode = cfile->dentry->d_inode;
+
+ if (!set_alloc && (size > inode->i_size + 8192)) {
+ __u8 set_sparse = 1;
+
+ /* whether set sparse succeeds or not, extend the file */
+ smb2_set_sparse(xid, tcon, cfile, inode, set_sparse);
+ }
+
return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof, false);
}
@@ -954,6 +1015,105 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len, bool keep_size)
+{
+ struct inode *inode;
+ struct cifsInodeInfo *cifsi;
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+ long rc;
+ unsigned int xid;
+
+ xid = get_xid();
+
+ inode = cfile->dentry->d_inode;
+ cifsi = CIFS_I(inode);
+
+ /* if file not oplocked can't be sure whether asking to extend size */
+ if (!CIFS_CACHE_READ(cifsi))
+ if (keep_size == false)
+ return -EOPNOTSUPP;
+
+ /*
+ * Must check if file sparse since fallocate -z (zero range) assumes
+ * non-sparse allocation
+ */
+ if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE))
+ return -EOPNOTSUPP;
+
+ /*
+ * need to make sure we are not asked to extend the file since the SMB3
+ * fsctl does not change the file size. In the future we could change
+ * this to zero the first part of the range then set the file size
+ * which for a non sparse file would zero the newly extended range
+ */
+ if (keep_size == false)
+ if (i_size_read(inode) < offset + len)
+ return -EOPNOTSUPP;
+
+ cifs_dbg(FYI, "offset %lld len %lld", offset, len);
+
+ fsctl_buf.FileOffset = cpu_to_le64(offset);
+ fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+
+ rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+ true /* is_fctl */, (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information), NULL, NULL);
+ free_xid(xid);
+ return rc;
+}
+
+static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode;
+ struct cifsInodeInfo *cifsi;
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+ long rc;
+ unsigned int xid;
+ __u8 set_sparse = 1;
+
+ xid = get_xid();
+
+ inode = cfile->dentry->d_inode;
+ cifsi = CIFS_I(inode);
+
+ /* Need to make file sparse, if not already, before freeing range. */
+ /* Consider adding equivalent for compressed since it could also work */
+ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
+ return -EOPNOTSUPP;
+
+ cifs_dbg(FYI, "offset %lld len %lld", offset, len);
+
+ fsctl_buf.FileOffset = cpu_to_le64(offset);
+ fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+
+ rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+ true /* is_fctl */, (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information), NULL, NULL);
+ free_xid(xid);
+ return rc;
+}
+
+static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
+ loff_t off, loff_t len)
+{
+ /* KEEP_SIZE already checked for by do_fallocate */
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ return smb3_punch_hole(file, tcon, off, len);
+ else if (mode & FALLOC_FL_ZERO_RANGE) {
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ return smb3_zero_range(file, tcon, off, len, true);
+ return smb3_zero_range(file, tcon, off, len, false);
+ }
+
+ return -EOPNOTSUPP;
+}
+
static void
smb2_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, bool set_level2)
@@ -1161,6 +1321,12 @@ smb2_wp_retry_size(struct inode *inode)
SMB2_MAX_BUFFER_SIZE);
}
+static bool
+smb2_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->invalidHandle;
+}
+
struct smb_version_operations smb20_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -1236,6 +1402,7 @@ struct smb_version_operations smb20_operations = {
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
.wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
};
struct smb_version_operations smb21_operations = {
@@ -1313,6 +1480,7 @@ struct smb_version_operations smb21_operations = {
.parse_lease_buf = smb2_parse_lease_buf,
.clone_range = smb2_clone_range,
.wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
};
struct smb_version_operations smb30_operations = {
@@ -1393,6 +1561,8 @@ struct smb_version_operations smb30_operations = {
.clone_range = smb2_clone_range,
.validate_negotiate = smb3_validate_negotiate,
.wp_retry_size = smb2_wp_retry_size,
+ .dir_needs_close = smb2_dir_needs_close,
+ .fallocate = smb3_fallocate,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 42ebc1a8be6c..fa0dd044213b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -907,7 +907,8 @@ tcon_exit:
tcon_error_exit:
if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
- tcon->bad_network_name = true;
+ if (tcon)
+ tcon->bad_network_name = true;
}
goto tcon_exit;
}
@@ -1224,7 +1225,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
cifs_dbg(FYI, "SMB2 IOCTL\n");
- *out_data = NULL;
+ if (out_data != NULL)
+ *out_data = NULL;
+
/* zero out returned data len, in case of error */
if (plen)
*plen = 0;
@@ -2177,6 +2180,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
if (rc) {
+ if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
+ srch_inf->endOfSearch = true;
+ rc = 0;
+ }
cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
goto qdir_exit;
}
@@ -2214,11 +2221,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
else
cifs_dbg(VFS, "illegal search buffer type\n");
- if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
- srch_inf->endOfSearch = 1;
- else
- srch_inf->endOfSearch = 0;
-
return rc;
qdir_exit:
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 69f3595d3952..fbe486c285a9 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -573,6 +573,12 @@ struct copychunk_ioctl {
__u32 Reserved2;
} __packed;
+/* this goes in the ioctl buffer when doing FSCTL_SET_ZERO_DATA */
+struct file_zero_data_information {
+ __le64 FileOffset;
+ __le64 BeyondFinalZero;
+} __packed;
+
struct copychunk_ioctl_rsp {
__le32 ChunksWritten;
__le32 ChunkBytesWritten;
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index 0e538b5c9622..83efa59535be 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -63,7 +63,7 @@
#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
-#define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */
+#define FSCTL_SET_ZERO_DATA 0x000980C8
#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 08cdfe5461e3..622e88249024 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2828,8 +2828,9 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
*/
overhead += ngroups * (2 + sbi->s_itb_per_group);
- /* Add the journal blocks as well */
- overhead += sbi->s_journal->j_maxlen;
+ /* Add the internal journal blocks as well */
+ if (sbi->s_journal && !sbi->journal_bdev)
+ overhead += sbi->s_journal->j_maxlen;
sbi->s_overhead_last = overhead;
smp_wmb();
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4556ce1af5b0..5ddaf8625d3b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -61,7 +61,7 @@ static void isofs_put_super(struct super_block *sb)
return;
}
-static int isofs_read_inode(struct inode *);
+static int isofs_read_inode(struct inode *, int relocated);
static int isofs_statfs (struct dentry *, struct kstatfs *);
static struct kmem_cache *isofs_inode_cachep;
@@ -1259,7 +1259,7 @@ out_toomany:
goto out;
}
-static int isofs_read_inode(struct inode *inode)
+static int isofs_read_inode(struct inode *inode, int relocated)
{
struct super_block *sb = inode->i_sb;
struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1404,7 +1404,7 @@ static int isofs_read_inode(struct inode *inode)
*/
if (!high_sierra) {
- parse_rock_ridge_inode(de, inode);
+ parse_rock_ridge_inode(de, inode, relocated);
/* if we want uid/gid set, override the rock ridge setting */
if (sbi->s_uid_set)
inode->i_uid = sbi->s_uid;
@@ -1483,9 +1483,10 @@ static int isofs_iget5_set(struct inode *ino, void *data)
* offset that point to the underlying meta-data for the inode. The
* code below is otherwise similar to the iget() code in
* include/linux/fs.h */
-struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset)
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated)
{
unsigned long hashval;
struct inode *inode;
@@ -1507,7 +1508,7 @@ struct inode *isofs_iget(struct super_block *sb,
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- ret = isofs_read_inode(inode);
+ ret = isofs_read_inode(inode, relocated);
if (ret < 0) {
iget_failed(inode);
inode = ERR_PTR(ret);
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 99167238518d..0ac4c1f73fbd 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -107,7 +107,7 @@ extern int iso_date(char *, int);
struct inode; /* To make gcc happy */
-extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *);
+extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated);
extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *);
extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *);
@@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int
extern struct buffer_head *isofs_bread(struct inode *, sector_t);
extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
-extern struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset);
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated);
+
+static inline struct inode *isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 0);
+}
+
+static inline struct inode *isofs_iget_reloc(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 1);
+}
/* Because the inode number is no longer relevant to finding the
* underlying meta-data for an inode, we are free to choose a more
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..f488bbae541a 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -288,12 +288,16 @@ eio:
goto out;
}
+#define RR_REGARD_XA 1
+#define RR_RELOC_DE 2
+
static int
parse_rock_ridge_inode_internal(struct iso_directory_record *de,
- struct inode *inode, int regard_xa)
+ struct inode *inode, int flags)
{
int symlink_len = 0;
int cnt, sig;
+ unsigned int reloc_block;
struct inode *reloc;
struct rock_ridge *rr;
int rootflag;
@@ -305,7 +309,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
init_rock_state(&rs, inode);
setup_rock_ridge(de, inode, &rs);
- if (regard_xa) {
+ if (flags & RR_REGARD_XA) {
rs.chr += 14;
rs.len -= 14;
if (rs.len < 0)
@@ -485,12 +489,22 @@ repeat:
"relocated directory\n");
goto out;
case SIG('C', 'L'):
- ISOFS_I(inode)->i_first_extent =
- isonum_733(rr->u.CL.location);
- reloc =
- isofs_iget(inode->i_sb,
- ISOFS_I(inode)->i_first_extent,
- 0);
+ if (flags & RR_RELOC_DE) {
+ printk(KERN_ERR
+ "ISOFS: Recursive directory relocation "
+ "is not supported\n");
+ goto eio;
+ }
+ reloc_block = isonum_733(rr->u.CL.location);
+ if (reloc_block == ISOFS_I(inode)->i_iget5_block &&
+ ISOFS_I(inode)->i_iget5_offset == 0) {
+ printk(KERN_ERR
+ "ISOFS: Directory relocation points to "
+ "itself\n");
+ goto eio;
+ }
+ ISOFS_I(inode)->i_first_extent = reloc_block;
+ reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0);
if (IS_ERR(reloc)) {
ret = PTR_ERR(reloc);
goto out;
@@ -637,9 +651,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
return rpnt;
}
-int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
+int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
+ int relocated)
{
- int result = parse_rock_ridge_inode_internal(de, inode, 0);
+ int flags = relocated ? RR_RELOC_DE : 0;
+ int result = parse_rock_ridge_inode_internal(de, inode, flags);
/*
* if rockridge flag was reset and we didn't look for attributes
@@ -647,7 +663,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
*/
if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
&& (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
- result = parse_rock_ridge_inode_internal(de, inode, 14);
+ result = parse_rock_ridge_inode_internal(de, inode,
+ flags | RR_REGARD_XA);
}
return result;
}
diff --git a/fs/locks.c b/fs/locks.c
index a6f54802d277..cb66fb05ad4a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -247,6 +247,18 @@ void locks_free_lock(struct file_lock *fl)
}
EXPORT_SYMBOL(locks_free_lock);
+static void
+locks_dispose_list(struct list_head *dispose)
+{
+ struct file_lock *fl;
+
+ while (!list_empty(dispose)) {
+ fl = list_first_entry(dispose, struct file_lock, fl_block);
+ list_del_init(&fl->fl_block);
+ locks_free_lock(fl);
+ }
+}
+
void locks_init_lock(struct file_lock *fl)
{
memset(fl, 0, sizeof(struct file_lock));
@@ -285,7 +297,8 @@ EXPORT_SYMBOL(__locks_copy_lock);
void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- locks_release_private(new);
+ /* "new" must be a freshly-initialized lock */
+ WARN_ON_ONCE(new->fl_ops);
__locks_copy_lock(new, fl);
new->fl_file = fl->fl_file;
@@ -650,12 +663,16 @@ static void locks_unlink_lock(struct file_lock **thisfl_p)
*
* Must be called with i_lock held!
*/
-static void locks_delete_lock(struct file_lock **thisfl_p)
+static void locks_delete_lock(struct file_lock **thisfl_p,
+ struct list_head *dispose)
{
struct file_lock *fl = *thisfl_p;
locks_unlink_lock(thisfl_p);
- locks_free_lock(fl);
+ if (dispose)
+ list_add(&fl->fl_block, dispose);
+ else
+ locks_free_lock(fl);
}
/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
@@ -811,6 +828,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
struct inode * inode = file_inode(filp);
int error = 0;
int found = 0;
+ LIST_HEAD(dispose);
if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
new_fl = locks_alloc_lock();
@@ -833,7 +851,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
if (request->fl_type == fl->fl_type)
goto out;
found = 1;
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
break;
}
@@ -880,6 +898,7 @@ out:
spin_unlock(&inode->i_lock);
if (new_fl)
locks_free_lock(new_fl);
+ locks_dispose_list(&dispose);
return error;
}
@@ -893,6 +912,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
struct file_lock **before;
int error;
bool added = false;
+ LIST_HEAD(dispose);
/*
* We may need two file_lock structures for this operation,
@@ -988,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
else
request->fl_end = fl->fl_end;
if (added) {
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
continue;
}
request = fl;
@@ -1018,21 +1038,24 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
* one (This may happen several times).
*/
if (added) {
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
continue;
}
- /* Replace the old lock with the new one.
- * Wake up anybody waiting for the old one,
- * as the change in lock type might satisfy
- * their needs.
+ /*
+ * Replace the old lock with new_fl, and
+ * remove the old one. It's safe to do the
+ * insert here since we know that we won't be
+ * using new_fl later, and that the lock is
+ * just replacing an existing lock.
*/
- locks_wake_up_blocks(fl);
- fl->fl_start = request->fl_start;
- fl->fl_end = request->fl_end;
- fl->fl_type = request->fl_type;
- locks_release_private(fl);
- locks_copy_private(fl, request);
- request = fl;
+ error = -ENOLCK;
+ if (!new_fl)
+ goto out;
+ locks_copy_lock(new_fl, request);
+ request = new_fl;
+ new_fl = NULL;
+ locks_delete_lock(before, &dispose);
+ locks_insert_lock(before, request);
added = true;
}
}
@@ -1093,6 +1116,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
locks_free_lock(new_fl);
if (new_fl2)
locks_free_lock(new_fl2);
+ locks_dispose_list(&dispose);
return error;
}
@@ -1268,7 +1292,7 @@ int lease_modify(struct file_lock **before, int arg)
printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
fl->fl_fasync = NULL;
}
- locks_delete_lock(before);
+ locks_delete_lock(before, NULL);
}
return 0;
}
@@ -1737,13 +1761,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
ret = fl;
spin_lock(&inode->i_lock);
error = __vfs_setlease(filp, arg, &ret);
- if (error) {
- spin_unlock(&inode->i_lock);
- locks_free_lock(fl);
- goto out_free_fasync;
- }
- if (ret != fl)
- locks_free_lock(fl);
+ if (error)
+ goto out_unlock;
+ if (ret == fl)
+ fl = NULL;
/*
* fasync_insert_entry() returns the old entry if any.
@@ -1755,9 +1776,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
new = NULL;
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+out_unlock:
spin_unlock(&inode->i_lock);
-
-out_free_fasync:
+ if (fl)
+ locks_free_lock(fl);
if (new)
fasync_free(new);
return error;
@@ -2320,6 +2342,7 @@ void locks_remove_file(struct file *filp)
struct inode * inode = file_inode(filp);
struct file_lock *fl;
struct file_lock **before;
+ LIST_HEAD(dispose);
if (!inode->i_flock)
return;
@@ -2365,12 +2388,13 @@ void locks_remove_file(struct file *filp)
fl->fl_type, fl->fl_flags,
fl->fl_start, fl->fl_end);
- locks_delete_lock(before);
+ locks_delete_lock(before, &dispose);
continue;
}
before = &fl->fl_next;
}
spin_unlock(&inode->i_lock);
+ locks_dispose_list(&dispose);
}
/**
@@ -2452,7 +2476,11 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
seq_puts(f, "FLOCK ADVISORY ");
}
} else if (IS_LEASE(fl)) {
- seq_puts(f, "LEASE ");
+ if (fl->fl_flags & FL_DELEG)
+ seq_puts(f, "DELEG ");
+ else
+ seq_puts(f, "LEASE ");
+
if (lease_breaking(fl))
seq_puts(f, "BREAKING ");
else if (fl->fl_file)
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 9737cba1357d..83a06001742b 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1014,7 +1014,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi)
- goto out_no_entry;
+ goto out_fail;
cfi.icb.extLength = cpu_to_le32(sb->s_blocksize);
cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
if (UDF_SB(inode->i_sb)->s_lvid_bh) {
@@ -1036,6 +1036,7 @@ out:
out_no_entry:
up_write(&iinfo->i_data_sem);
+out_fail:
inode_dec_link_count(inode);
iput(inode);
goto out;