summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/Changes2
-rw-r--r--fs/aio.c115
-rw-r--r--fs/btrfs/check-integrity.c32
-rw-r--r--fs/btrfs/check-integrity.h2
-rw-r--r--fs/btrfs/extent-tree.c22
-rw-r--r--fs/btrfs/extent_io.c12
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/relocation.c81
-rw-r--r--fs/btrfs/scrub.c33
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/cache.c3
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/inode.c183
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h8
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/cifsproto.h7
-rw-r--r--fs/cifs/cifssmb.c6
-rw-r--r--fs/cifs/dir.c11
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/ioctl.c6
-rw-r--r--fs/cifs/link.c26
-rw-r--r--fs/cifs/smb2ops.c99
-rw-r--r--fs/cifs/smb2pdu.c92
-rw-r--r--fs/cifs/smb2pdu.h12
-rw-r--r--fs/cifs/smb2proto.h1
-rw-r--r--fs/cifs/smbfsctl.h2
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/eventpoll.c7
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/extents.c45
-rw-r--r--fs/ext4/inode.c12
-rw-r--r--fs/ext4/mballoc.c17
-rw-r--r--fs/ext4/super.c21
-rw-r--r--fs/gfs2/aops.c30
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/glops.c10
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/meta_io.c5
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/hfsplus/wrapper.c17
-rw-r--r--fs/jbd2/journal.c18
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c16
-rw-r--r--fs/logfs/dev_bdev.c13
-rw-r--r--fs/namei.c10
-rw-r--r--fs/nfs/blocklayout/blocklayout.h1
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/dns_resolve.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h15
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4proc.c30
-rw-r--r--fs/nfsd/nfscache.c9
-rw-r--r--fs/pipe.c39
-rw-r--r--fs/proc/inode.c14
-rw-r--r--fs/pstore/platform.c7
-rw-r--r--fs/squashfs/file_direct.c5
-rw-r--r--fs/sysfs/file.c18
-rw-r--r--fs/xfs/xfs_bmap.c32
-rw-r--r--fs/xfs/xfs_bmap_util.c14
-rw-r--r--fs/xfs/xfs_buf.c37
-rw-r--r--fs/xfs/xfs_buf.h11
-rw-r--r--fs/xfs/xfs_buf_item.c21
-rw-r--r--fs/xfs/xfs_dir2_node.c26
-rw-r--r--fs/xfs/xfs_discard.c5
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_iops.c3
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_qm.c80
-rw-r--r--fs/xfs/xfs_trans_buf.c13
79 files changed, 1017 insertions, 549 deletions
diff --git a/fs/affs/Changes b/fs/affs/Changes
index a29409c1ffe0..b41c2c9792ff 100644
--- a/fs/affs/Changes
+++ b/fs/affs/Changes
@@ -91,7 +91,7 @@ more 2.4 fixes: [Roman Zippel]
Version 3.11
------------
-- Converted to use 2.3.x page cache [Dave Jones <dave@powertweak.com>]
+- Converted to use 2.3.x page cache [Dave Jones]
- Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>]
Version 3.10
diff --git a/fs/aio.c b/fs/aio.c
index 08159ed13649..062a5f6a1448 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
int i;
for (i = 0; i < ctx->nr_pages; i++) {
+ struct page *page;
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i]));
- put_page(ctx->ring_pages[i]);
+ page = ctx->ring_pages[i];
+ if (!page)
+ continue;
+ ctx->ring_pages[i] = NULL;
+ put_page(page);
}
put_aio_ring_file(ctx);
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
unsigned long flags;
int rc;
+ rc = 0;
+
+ /* Make sure the old page hasn't already been changed */
+ spin_lock(&mapping->private_lock);
+ ctx = mapping->private_data;
+ if (ctx) {
+ pgoff_t idx;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ idx = old->index;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ if (ctx->ring_pages[idx] != old)
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else
+ rc = -EINVAL;
+ spin_unlock(&mapping->private_lock);
+
+ if (rc != 0)
+ return rc;
+
/* Writeback must be complete */
BUG_ON(PageWriteback(old));
- put_page(old);
+ get_page(new);
- rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
- get_page(old);
+ put_page(new);
return rc;
}
- get_page(new);
-
/* We can potentially race against kioctx teardown here. Use the
* address_space's private data lock to protect the mapping's
* private_data.
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
spin_lock_irqsave(&ctx->completion_lock, flags);
migrate_page_copy(new, old);
idx = old->index;
- if (idx < (pgoff_t)ctx->nr_pages)
- ctx->ring_pages[idx] = new;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ /* And only do the move if things haven't changed */
+ if (ctx->ring_pages[idx] == old)
+ ctx->ring_pages[idx] = new;
+ else
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else
rc = -EBUSY;
spin_unlock(&mapping->private_lock);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_page(old);
+ else
+ put_page(new);
+
return rc;
}
#endif
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
struct aio_ring *ring;
unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm;
- unsigned long size, populate;
+ unsigned long size, unused;
int nr_pages;
int i;
struct file *file;
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx)
return -EAGAIN;
}
+ ctx->aio_ring_file = file;
+ nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
+ / sizeof(struct io_event);
+
+ ctx->ring_pages = ctx->internal_pages;
+ if (nr_pages > AIO_RING_PAGES) {
+ ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!ctx->ring_pages) {
+ put_aio_ring_file(ctx);
+ return -ENOMEM;
+ }
+ }
+
for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_inode->i_mapping,
@@ -358,17 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
SetPageUptodate(page);
SetPageDirty(page);
unlock_page(page);
+
+ ctx->ring_pages[i] = page;
}
- ctx->aio_ring_file = file;
- nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
- / sizeof(struct io_event);
+ ctx->nr_pages = i;
- ctx->ring_pages = ctx->internal_pages;
- if (nr_pages > AIO_RING_PAGES) {
- ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
- GFP_KERNEL);
- if (!ctx->ring_pages)
- return -ENOMEM;
+ if (unlikely(i != nr_pages)) {
+ aio_free_ring(ctx);
+ return -EAGAIN;
}
ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -377,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
down_write(&mm->mmap_sem);
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, 0, &populate);
+ MAP_SHARED, 0, &unused);
+ up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
- up_write(&mm->mmap_sem);
ctx->mmap_size = 0;
aio_free_ring(ctx);
return -EAGAIN;
@@ -387,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
- /* We must do this while still holding mmap_sem for write, as we
- * need to be protected against userspace attempting to mremap()
- * or munmap() the ring buffer.
- */
- ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
- 1, 0, ctx->ring_pages, NULL);
-
- /* Dropping the reference here is safe as the page cache will hold
- * onto the pages for us. It is also required so that page migration
- * can unmap the pages and get the right reference count.
- */
- for (i = 0; i < ctx->nr_pages; i++)
- put_page(ctx->ring_pages[i]);
-
- up_write(&mm->mmap_sem);
-
- if (unlikely(ctx->nr_pages != nr_pages)) {
- aio_free_ring(ctx);
- return -EAGAIN;
- }
-
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
@@ -645,12 +671,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
aio_nr + nr_events < aio_nr) {
spin_unlock(&aio_nr_lock);
err = -EAGAIN;
- goto err;
+ goto err_ctx;
}
aio_nr += ctx->max_reqs;
spin_unlock(&aio_nr_lock);
- percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+ percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+ percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */
err = ioctx_add_table(ctx, mm);
if (err)
@@ -662,6 +689,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err_cleanup:
aio_nr_sub(ctx->max_reqs);
+err_ctx:
+ aio_free_ring(ctx);
err:
free_percpu(ctx->cpu);
free_percpu(ctx->reqs.pcpu_count);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b50764bef141..131d82800b3a 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -333,7 +333,6 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
static int btrfsic_read_block(struct btrfsic_state *state,
struct btrfsic_block_data_ctx *block_ctx);
static void btrfsic_dump_database(struct btrfsic_state *state);
-static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
static int btrfsic_test_for_metadata(struct btrfsic_state *state,
char **datav, unsigned int num_pages);
static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
@@ -1687,7 +1686,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
for (i = 0; i < num_pages;) {
struct bio *bio;
unsigned int j;
- DECLARE_COMPLETION_ONSTACK(complete);
bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
if (!bio) {
@@ -1698,8 +1696,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
}
bio->bi_bdev = block_ctx->dev->bdev;
bio->bi_sector = dev_bytenr >> 9;
- bio->bi_end_io = btrfsic_complete_bio_end_io;
- bio->bi_private = &complete;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1712,12 +1708,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
"btrfsic: error, failed to add a single page!\n");
return -1;
}
- submit_bio(READ, bio);
-
- /* this will also unplug the queue */
- wait_for_completion(&complete);
-
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ if (submit_bio_wait(READ, bio)) {
printk(KERN_INFO
"btrfsic: read error at logical %llu dev %s!\n",
block_ctx->start, block_ctx->dev->name);
@@ -1740,11 +1731,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
return block_ctx->len;
}
-static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
-{
- complete((struct completion *)bio->bi_private);
-}
-
static void btrfsic_dump_database(struct btrfsic_state *state)
{
struct list_head *elem_all;
@@ -3008,14 +2994,12 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
return submit_bh(rw, bh);
}
-void btrfsic_submit_bio(int rw, struct bio *bio)
+static void __btrfsic_submit_bio(int rw, struct bio *bio)
{
struct btrfsic_dev_state *dev_state;
- if (!btrfsic_is_initialized) {
- submit_bio(rw, bio);
+ if (!btrfsic_is_initialized)
return;
- }
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
@@ -3106,10 +3090,20 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
}
leave:
mutex_unlock(&btrfsic_mutex);
+}
+void btrfsic_submit_bio(int rw, struct bio *bio)
+{
+ __btrfsic_submit_bio(rw, bio);
submit_bio(rw, bio);
}
+int btrfsic_submit_bio_wait(int rw, struct bio *bio)
+{
+ __btrfsic_submit_bio(rw, bio);
+ return submit_bio_wait(rw, bio);
+}
+
int btrfsic_mount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices,
int including_extent_data, u32 print_mask)
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 8b59175cc502..13b8566c97ab 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -22,9 +22,11 @@
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
int btrfsic_submit_bh(int rw, struct buffer_head *bh);
void btrfsic_submit_bio(int rw, struct bio *bio);
+int btrfsic_submit_bio_wait(int rw, struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
+#define btrfsic_submit_bio_wait submit_bio_wait
#endif
int btrfsic_mount(struct btrfs_root *root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 45d98d01028f..9c01509dd8ab 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -767,20 +767,19 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- if (metadata) {
- key.objectid = bytenr;
- key.type = BTRFS_METADATA_ITEM_KEY;
- key.offset = offset;
- } else {
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = offset;
- }
-
if (!trans) {
path->skip_locking = 1;
path->search_commit_root = 1;
}
+
+search_again:
+ key.objectid = bytenr;
+ key.offset = offset;
+ if (metadata)
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+
again:
ret = btrfs_search_slot(trans, root->fs_info->extent_root,
&key, path, 0, 0);
@@ -788,7 +787,6 @@ again:
goto out_free;
if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
- metadata = 0;
if (path->slots[0]) {
path->slots[0]--;
btrfs_item_key_to_cpu(path->nodes[0], &key,
@@ -855,7 +853,7 @@ again:
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
- goto again;
+ goto search_again;
}
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8e457fca0a0b..ff43802a7c88 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1952,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
return err;
}
-static void repair_io_failure_callback(struct bio *bio, int err)
-{
- complete(bio->bi_private);
-}
-
/*
* this bypasses the standard btrfs submit functions deliberately, as
* the standard behavior is to write all copies in a raid setup. here we only
@@ -1973,7 +1968,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
{
struct bio *bio;
struct btrfs_device *dev;
- DECLARE_COMPLETION_ONSTACK(compl);
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
@@ -1990,8 +1984,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
- bio->bi_private = &compl;
- bio->bi_end_io = repair_io_failure_callback;
bio->bi_size = 0;
map_length = length;
@@ -2012,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start - page_offset(page));
- btrfsic_submit_bio(WRITE_SYNC, bio);
- wait_for_completion(&compl);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a111622598b0..21da5762b0b1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2121,7 +2121,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
if (err == -EINTR)
- goto out;
+ goto out_drop_write;
dentry = lookup_one_len(vol_args->name, parent, namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -2284,6 +2284,7 @@ out_dput:
dput(dentry);
out_unlock_dir:
mutex_unlock(&dir->i_mutex);
+out_drop_write:
mnt_drop_write_file(file);
out:
kfree(vol_args);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ce459a7cb16d..429c73c374b8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -571,7 +571,9 @@ static int is_cowonly_root(u64 root_objectid)
root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
root_objectid == BTRFS_DEV_TREE_OBJECTID ||
root_objectid == BTRFS_TREE_LOG_OBJECTID ||
- root_objectid == BTRFS_CSUM_TREE_OBJECTID)
+ root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
+ root_objectid == BTRFS_UUID_TREE_OBJECTID ||
+ root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
return 1;
return 0;
}
@@ -1264,10 +1266,10 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
}
/*
- * helper to update/delete the 'address of tree root -> reloc tree'
+ * helper to delete the 'address of tree root -> reloc tree'
* mapping
*/
-static int __update_reloc_root(struct btrfs_root *root, int del)
+static void __del_reloc_root(struct btrfs_root *root)
{
struct rb_node *rb_node;
struct mapping_node *node = NULL;
@@ -1275,7 +1277,7 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->commit_root->start);
+ root->node->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1283,23 +1285,45 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
spin_unlock(&rc->reloc_root_tree.lock);
if (!node)
- return 0;
+ return;
BUG_ON((struct btrfs_root *)node->data != root);
- if (!del) {
- spin_lock(&rc->reloc_root_tree.lock);
- node->bytenr = root->node->start;
- rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
- node->bytenr, &node->rb_node);
- spin_unlock(&rc->reloc_root_tree.lock);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST, node->bytenr);
- } else {
- spin_lock(&root->fs_info->trans_lock);
- list_del_init(&root->root_list);
- spin_unlock(&root->fs_info->trans_lock);
- kfree(node);
+ spin_lock(&root->fs_info->trans_lock);
+ list_del_init(&root->root_list);
+ spin_unlock(&root->fs_info->trans_lock);
+ kfree(node);
+}
+
+/*
+ * helper to update the 'address of tree root -> reloc tree'
+ * mapping
+ */
+static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
+{
+ struct rb_node *rb_node;
+ struct mapping_node *node = NULL;
+ struct reloc_control *rc = root->fs_info->reloc_ctl;
+
+ spin_lock(&rc->reloc_root_tree.lock);
+ rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+ root->node->start);
+ if (rb_node) {
+ node = rb_entry(rb_node, struct mapping_node, rb_node);
+ rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
}
+ spin_unlock(&rc->reloc_root_tree.lock);
+
+ if (!node)
+ return 0;
+ BUG_ON((struct btrfs_root *)node->data != root);
+
+ spin_lock(&rc->reloc_root_tree.lock);
+ node->bytenr = new_bytenr;
+ rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
+ node->bytenr, &node->rb_node);
+ spin_unlock(&rc->reloc_root_tree.lock);
+ if (rb_node)
+ backref_tree_panic(rb_node, -EEXIST, node->bytenr);
return 0;
}
@@ -1420,7 +1444,6 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
{
struct btrfs_root *reloc_root;
struct btrfs_root_item *root_item;
- int del = 0;
int ret;
if (!root->reloc_root)
@@ -1432,11 +1455,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
if (root->fs_info->reloc_ctl->merge_reloc_tree &&
btrfs_root_refs(root_item) == 0) {
root->reloc_root = NULL;
- del = 1;
+ __del_reloc_root(reloc_root);
}
- __update_reloc_root(reloc_root, del);
-
if (reloc_root->commit_root != reloc_root->node) {
btrfs_set_root_node(root_item, reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
@@ -2287,7 +2308,7 @@ void free_reloc_roots(struct list_head *list)
while (!list_empty(list)) {
reloc_root = list_entry(list->next, struct btrfs_root,
root_list);
- __update_reloc_root(reloc_root, 1);
+ __del_reloc_root(reloc_root);
free_extent_buffer(reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
kfree(reloc_root);
@@ -2332,7 +2353,7 @@ again:
ret = merge_reloc_root(rc, root);
if (ret) {
- __update_reloc_root(reloc_root, 1);
+ __del_reloc_root(reloc_root);
free_extent_buffer(reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
kfree(reloc_root);
@@ -2388,6 +2409,13 @@ out:
btrfs_std_error(root->fs_info, ret);
if (!list_empty(&reloc_roots))
free_reloc_roots(&reloc_roots);
+
+ /* new reloc root may be added */
+ mutex_lock(&root->fs_info->reloc_mutex);
+ list_splice_init(&rc->reloc_roots, &reloc_roots);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+ if (!list_empty(&reloc_roots))
+ free_reloc_roots(&reloc_roots);
}
BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
@@ -4522,6 +4550,11 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
+ if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+ if (buf == root->node)
+ __update_reloc_root(root, cow->start);
+ }
+
level = btrfs_header_level(buf);
if (btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item))
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 561e2f16ba3e..1fd3f33c330a 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -208,7 +208,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
int is_metadata, int have_csum,
const u8 *csum, u64 generation,
u16 csum_size);
-static void scrub_complete_bio_end_io(struct bio *bio, int err);
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write);
@@ -1294,7 +1293,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct bio *bio;
struct scrub_page *page = sblock->pagev[page_num];
- DECLARE_COMPLETION_ONSTACK(complete);
if (page->dev->bdev == NULL) {
page->io_error = 1;
@@ -1311,18 +1309,11 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
bio->bi_bdev = page->dev->bdev;
bio->bi_sector = page->physical >> 9;
- bio->bi_end_io = scrub_complete_bio_end_io;
- bio->bi_private = &complete;
bio_add_page(bio, page->page, PAGE_SIZE, 0);
- btrfsic_submit_bio(READ, bio);
-
- /* this will also unplug the queue */
- wait_for_completion(&complete);
-
- page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ if (btrfsic_submit_bio_wait(READ, bio))
sblock->no_io_error_seen = 0;
+
bio_put(bio);
}
@@ -1391,11 +1382,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
sblock->checksum_error = 1;
}
-static void scrub_complete_bio_end_io(struct bio *bio, int err)
-{
- complete((struct completion *)bio->bi_private);
-}
-
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int force_write)
@@ -1430,7 +1416,6 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
sblock_bad->checksum_error || page_bad->io_error) {
struct bio *bio;
int ret;
- DECLARE_COMPLETION_ONSTACK(complete);
if (!page_bad->dev->bdev) {
printk_ratelimited(KERN_WARNING
@@ -1443,19 +1428,14 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_sector = page_bad->physical >> 9;
- bio->bi_end_io = scrub_complete_bio_end_io;
- bio->bi_private = &complete;
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
bio_put(bio);
return -EIO;
}
- btrfsic_submit_bio(WRITE, bio);
- /* this will also unplug the queue */
- wait_for_completion(&complete);
- if (!bio_flagged(bio, BIO_UPTODATE)) {
+ if (btrfsic_submit_bio_wait(WRITE, bio)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
btrfs_dev_replace_stats_inc(
@@ -3375,7 +3355,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
struct bio *bio;
struct btrfs_device *dev;
int ret;
- DECLARE_COMPLETION_ONSTACK(compl);
dev = sctx->wr_ctx.tgtdev;
if (!dev)
@@ -3392,8 +3371,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
spin_unlock(&sctx->stat_lock);
return -ENOMEM;
}
- bio->bi_private = &compl;
- bio->bi_end_io = scrub_complete_bio_end_io;
bio->bi_size = 0;
bio->bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
@@ -3404,10 +3381,8 @@ leave_with_eio:
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
- btrfsic_submit_bio(WRITE_SYNC, bio);
- wait_for_completion(&compl);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
goto leave_with_eio;
bio_put(bio);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6837fe87f3a6..945d1db98f26 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4723,8 +4723,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
}
if (!access_ok(VERIFY_READ, arg->clone_sources,
- sizeof(*arg->clone_sources *
- arg->clone_sources_count))) {
+ sizeof(*arg->clone_sources) *
+ arg->clone_sources_count)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2d8ac1bf0cf9..d71a11d13dfa 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -432,7 +432,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
} else {
printk(KERN_INFO "btrfs: setting nodatacow\n");
}
- info->compress_type = BTRFS_COMPRESS_NONE;
btrfs_clear_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
btrfs_set_opt(info->mount_opt, NODATACOW);
@@ -461,7 +460,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_fs_incompat(info, COMPRESS_LZO);
} else if (strncmp(args[0].from, "no", 2) == 0) {
compress_type = "no";
- info->compress_type = BTRFS_COMPRESS_NONE;
btrfs_clear_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
compress_force = false;
@@ -474,9 +472,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
pr_info("btrfs: force %s compression\n",
compress_type);
- } else
+ } else if (btrfs_test_opt(root, COMPRESS)) {
pr_info("btrfs: use %s compression\n",
compress_type);
+ }
break;
case Opt_ssd:
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6df8bd481425..ec3ba43b9faa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -210,13 +210,17 @@ static int readpage_nounlock(struct file *filp, struct page *page)
if (err < 0) {
SetPageError(page);
goto out;
- } else if (err < PAGE_CACHE_SIZE) {
+ } else {
+ if (err < PAGE_CACHE_SIZE) {
/* zero fill remainder of page */
- zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ } else {
+ flush_dcache_page(page);
+ }
}
SetPageUptodate(page);
- if (err == 0)
+ if (err >= 0)
ceph_readpage_to_fscache(inode, page);
out:
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 7db2e6ca4b8f..8c44fdd4e1c3 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ if (!PageFsCache(page))
+ return;
+
fscache_wait_on_page_write(ci->fscache, page);
fscache_uncache_page(ci->fscache, page);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 13976c33332e..3c0a4bd74996 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
* caller should hold i_ceph_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
-void __ceph_remove_cap(struct ceph_cap *cap)
+void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
@@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap)
/* remove from session list */
spin_lock(&session->s_cap_lock);
+ /*
+ * s_cap_reconnect is protected by s_cap_lock. no one changes
+ * s_cap_gen while session is in the reconnect state.
+ */
+ if (queue_release &&
+ (!session->s_cap_reconnect ||
+ cap->cap_gen == session->s_cap_gen))
+ __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
+ cap->mseq, cap->issue_seq);
+
if (session->s_cap_iterator == cap) {
/* not yet, we are iterating over this very cap */
dout("__ceph_remove_cap delaying %p removal from session %p\n",
@@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
struct ceph_mds_cap_release *head;
struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
BUG_ON(!session->s_num_cap_releases);
msg = list_first_entry(&session->s_cap_releases,
struct ceph_msg, list_head);
@@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
(int)CEPH_CAPS_PER_RELEASE,
(int)msg->front.iov_len);
}
- spin_unlock(&session->s_cap_lock);
}
/*
@@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode)
p = rb_first(&ci->i_caps);
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
- struct ceph_mds_session *session = cap->session;
-
- __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
- cap->mseq, cap->issue_seq);
p = rb_next(p);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, true);
}
}
@@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
}
spin_unlock(&mdsc->cap_dirty_lock);
}
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, false);
}
/* else, we already released it */
@@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
- if (op == CEPH_CAP_OP_IMPORT)
+ if (op == CEPH_CAP_OP_IMPORT) {
+ spin_lock(&session->s_cap_lock);
__queue_cap_release(session, vino.ino, cap_id,
mseq, seq);
+ spin_unlock(&session->s_cap_lock);
+ }
goto flush_cap_releases;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 868b61d56cac..2a0bcaeb189a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -352,8 +352,18 @@ more:
}
/* note next offset and last dentry name */
+ rinfo = &req->r_reply_info;
+ if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+ frag = le32_to_cpu(rinfo->dir_dir->frag);
+ if (ceph_frag_is_leftmost(frag))
+ fi->next_offset = 2;
+ else
+ fi->next_offset = 0;
+ off = fi->next_offset;
+ }
fi->offset = fi->next_offset;
fi->last_readdir = req;
+ fi->frag = frag;
if (req->r_reply_info.dir_end) {
kfree(fi->last_name);
@@ -363,7 +373,6 @@ more:
else
fi->next_offset = 0;
} else {
- rinfo = &req->r_reply_info;
err = note_last_dentry(fi,
rinfo->dir_dname[rinfo->dir_nr-1],
rinfo->dir_dname_len[rinfo->dir_nr-1]);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8549a48115f7..278fd2891288 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode,
int issued = 0, implemented;
struct timespec mtime, atime, ctime;
u32 nsplits;
+ struct ceph_inode_frag *frag;
+ struct rb_node *rb_node;
struct ceph_buffer *xattr_blob = NULL;
int err = 0;
int queue_trunc = 0;
@@ -751,15 +753,38 @@ no_change:
/* FIXME: move me up, if/when version reflects fragtree changes */
nsplits = le32_to_cpu(info->fragtree.nsplits);
mutex_lock(&ci->i_fragtree_mutex);
+ rb_node = rb_first(&ci->i_fragtree);
for (i = 0; i < nsplits; i++) {
u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
- struct ceph_inode_frag *frag = __get_or_create_frag(ci, id);
-
- if (IS_ERR(frag))
- continue;
+ frag = NULL;
+ while (rb_node) {
+ frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+ if (ceph_frag_compare(frag->frag, id) >= 0) {
+ if (frag->frag != id)
+ frag = NULL;
+ else
+ rb_node = rb_next(rb_node);
+ break;
+ }
+ rb_node = rb_next(rb_node);
+ rb_erase(&frag->node, &ci->i_fragtree);
+ kfree(frag);
+ frag = NULL;
+ }
+ if (!frag) {
+ frag = __get_or_create_frag(ci, id);
+ if (IS_ERR(frag))
+ continue;
+ }
frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
dout(" frag %x split by %d\n", frag->frag, frag->split_by);
}
+ while (rb_node) {
+ frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+ rb_node = rb_next(rb_node);
+ rb_erase(&frag->node, &ci->i_fragtree);
+ kfree(frag);
+ }
mutex_unlock(&ci->i_fragtree_mutex);
/* were we issued a capability? */
@@ -953,7 +978,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct ceph_mds_reply_inode *ininfo;
struct ceph_vino vino;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
- int i = 0;
int err = 0;
dout("fill_trace %p is_dentry %d is_target %d\n", req,
@@ -1014,6 +1038,29 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
}
}
+ if (rinfo->head->is_target) {
+ vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+
+ in = ceph_get_inode(sb, vino);
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
+ goto done;
+ }
+ req->r_target_inode = in;
+
+ err = fill_inode(in, &rinfo->targeti, NULL,
+ session, req->r_request_started,
+ (le32_to_cpu(rinfo->head->result) == 0) ?
+ req->r_fmode : -1,
+ &req->r_caps_reservation);
+ if (err < 0) {
+ pr_err("fill_inode badness %p %llx.%llx\n",
+ in, ceph_vinop(in));
+ goto done;
+ }
+ }
+
/*
* ignore null lease/binding on snapdir ENOENT, or else we
* will have trouble splicing in the virtual snapdir later
@@ -1083,7 +1130,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
ceph_dentry(req->r_old_dentry)->offset);
dn = req->r_old_dentry; /* use old_dentry */
- in = dn->d_inode;
}
/* null dentry? */
@@ -1105,44 +1151,28 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
}
/* attach proper inode */
- ininfo = rinfo->targeti.in;
- vino.ino = le64_to_cpu(ininfo->ino);
- vino.snap = le64_to_cpu(ininfo->snapid);
- in = dn->d_inode;
- if (!in) {
- in = ceph_get_inode(sb, vino);
- if (IS_ERR(in)) {
- pr_err("fill_trace bad get_inode "
- "%llx.%llx\n", vino.ino, vino.snap);
- err = PTR_ERR(in);
- d_drop(dn);
- goto done;
- }
+ if (!dn->d_inode) {
+ ihold(in);
dn = splice_dentry(dn, in, &have_lease, true);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
}
req->r_dentry = dn; /* may have spliced */
- ihold(in);
- } else if (ceph_ino(in) == vino.ino &&
- ceph_snap(in) == vino.snap) {
- ihold(in);
- } else {
+ } else if (dn->d_inode && dn->d_inode != in) {
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
- dn, in, ceph_ino(in), ceph_snap(in),
- vino.ino, vino.snap);
+ dn, dn->d_inode, ceph_vinop(dn->d_inode),
+ ceph_vinop(in));
have_lease = false;
- in = NULL;
}
if (have_lease)
update_dentry_lease(dn, rinfo->dlease, session,
req->r_request_started);
dout(" final dn %p\n", dn);
- i++;
- } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
- req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) {
+ } else if (!req->r_aborted &&
+ (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+ req->r_op == CEPH_MDS_OP_MKSNAP)) {
struct dentry *dn = req->r_dentry;
/* fill out a snapdir LOOKUPSNAP dentry */
@@ -1152,52 +1182,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
ininfo = rinfo->targeti.in;
vino.ino = le64_to_cpu(ininfo->ino);
vino.snap = le64_to_cpu(ininfo->snapid);
- in = ceph_get_inode(sb, vino);
- if (IS_ERR(in)) {
- pr_err("fill_inode get_inode badness %llx.%llx\n",
- vino.ino, vino.snap);
- err = PTR_ERR(in);
- d_delete(dn);
- goto done;
- }
dout(" linking snapped dir %p to dn %p\n", in, dn);
+ ihold(in);
dn = splice_dentry(dn, in, NULL, true);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
}
req->r_dentry = dn; /* may have spliced */
- ihold(in);
- rinfo->head->is_dentry = 1; /* fool notrace handlers */
- }
-
- if (rinfo->head->is_target) {
- vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
-
- if (in == NULL || ceph_ino(in) != vino.ino ||
- ceph_snap(in) != vino.snap) {
- in = ceph_get_inode(sb, vino);
- if (IS_ERR(in)) {
- err = PTR_ERR(in);
- goto done;
- }
- }
- req->r_target_inode = in;
-
- err = fill_inode(in,
- &rinfo->targeti, NULL,
- session, req->r_request_started,
- (le32_to_cpu(rinfo->head->result) == 0) ?
- req->r_fmode : -1,
- &req->r_caps_reservation);
- if (err < 0) {
- pr_err("fill_inode badness %p %llx.%llx\n",
- in, ceph_vinop(in));
- goto done;
- }
}
-
done:
dout("fill_trace done err=%d\n", err);
return err;
@@ -1247,11 +1240,23 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct qstr dname;
struct dentry *dn;
struct inode *in;
- int err = 0, i;
+ int err = 0, ret, i;
struct inode *snapdir = NULL;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
- u64 frag = le32_to_cpu(rhead->args.readdir.frag);
struct ceph_dentry_info *di;
+ u64 r_readdir_offset = req->r_readdir_offset;
+ u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+
+ if (rinfo->dir_dir &&
+ le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+ dout("readdir_prepopulate got new frag %x -> %x\n",
+ frag, le32_to_cpu(rinfo->dir_dir->frag));
+ frag = le32_to_cpu(rinfo->dir_dir->frag);
+ if (ceph_frag_is_leftmost(frag))
+ r_readdir_offset = 2;
+ else
+ r_readdir_offset = 0;
+ }
if (req->r_aborted)
return readdir_prepopulate_inodes_only(req, session);
@@ -1268,6 +1273,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
}
+ /* FIXME: release caps/leases if error occurs */
for (i = 0; i < rinfo->dir_nr; i++) {
struct ceph_vino vino;
@@ -1292,9 +1298,10 @@ retry_lookup:
err = -ENOMEM;
goto out;
}
- err = ceph_init_dentry(dn);
- if (err < 0) {
+ ret = ceph_init_dentry(dn);
+ if (ret < 0) {
dput(dn);
+ err = ret;
goto out;
}
} else if (dn->d_inode &&
@@ -1314,9 +1321,6 @@ retry_lookup:
spin_unlock(&parent->d_lock);
}
- di = dn->d_fsdata;
- di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset);
-
/* inode */
if (dn->d_inode) {
in = dn->d_inode;
@@ -1329,26 +1333,39 @@ retry_lookup:
err = PTR_ERR(in);
goto out;
}
- dn = splice_dentry(dn, in, NULL, false);
- if (IS_ERR(dn))
- dn = NULL;
}
if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
+ if (!dn->d_inode)
+ iput(in);
+ d_drop(dn);
goto next_item;
}
- if (dn)
- update_dentry_lease(dn, rinfo->dir_dlease[i],
- req->r_session,
- req->r_request_started);
+
+ if (!dn->d_inode) {
+ dn = splice_dentry(dn, in, NULL, false);
+ if (IS_ERR(dn)) {
+ err = PTR_ERR(dn);
+ dn = NULL;
+ goto next_item;
+ }
+ }
+
+ di = dn->d_fsdata;
+ di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
+
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
next_item:
if (dn)
dput(dn);
}
- req->r_did_prepopulate = true;
+ if (err == 0)
+ req->r_did_prepopulate = true;
out:
if (snapdir) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b7bda5d9611d..d90861f45210 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -43,6 +43,7 @@
*/
struct ceph_reconnect_state {
+ int nr_caps;
struct ceph_pagelist *pagelist;
bool flock;
};
@@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
INIT_LIST_HEAD(&s->s_waiting);
INIT_LIST_HEAD(&s->s_unsafe);
s->s_num_cap_releases = 0;
+ s->s_cap_reconnect = 0;
s->s_cap_iterator = NULL;
INIT_LIST_HEAD(&s->s_cap_releases);
INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
req->r_unsafe_dir = NULL;
}
+ complete_all(&req->r_safe_completion);
+
ceph_mdsc_put_request(req);
}
@@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, false);
if (!__ceph_is_any_real_caps(ci)) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
session->s_trim_caps--;
if (oissued) {
/* we aren't the only cap.. just remove us */
- __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
- cap->mseq, cap->issue_seq);
- __ceph_remove_cap(cap);
+ __ceph_remove_cap(cap, true);
} else {
/* try to drop referring dentries */
spin_unlock(&ci->i_ceph_lock);
@@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
unsigned num;
dout("discard_cap_releases mds%d\n", session->s_mds);
- spin_lock(&session->s_cap_lock);
/* zero out the in-progress message */
msg = list_first_entry(&session->s_cap_releases,
@@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
msg->front.iov_len = sizeof(*head);
list_add(&msg->list_head, &session->s_cap_releases);
}
-
- spin_unlock(&session->s_cap_lock);
}
/*
@@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = -EAGAIN;
- if (req->r_err || req->r_got_result)
+ if (req->r_err || req->r_got_result) {
+ if (req->r_aborted)
+ __unregister_request(mdsc, req);
goto out;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
- req->r_op == CEPH_MDS_OP_LSSNAP) &&
- rinfo->dir_nr)
+ req->r_op == CEPH_MDS_OP_LSSNAP))
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
@@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
cap->mseq = 0; /* and migrate_seq */
+ cap->cap_gen = cap->session->s_cap_gen;
if (recon_state->flock) {
rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2552,6 +2553,8 @@ encode_again:
} else {
err = ceph_pagelist_append(pagelist, &rec, reclen);
}
+
+ recon_state->nr_caps++;
out_free:
kfree(path);
out_dput:
@@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
struct rb_node *p;
int mds = session->s_mds;
int err = -ENOMEM;
+ int s_nr_caps;
struct ceph_pagelist *pagelist;
struct ceph_reconnect_state recon_state;
@@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
dout("session %p state %s\n", session,
session_state_name(session->s_state));
+ spin_lock(&session->s_gen_ttl_lock);
+ session->s_cap_gen++;
+ spin_unlock(&session->s_gen_ttl_lock);
+
+ spin_lock(&session->s_cap_lock);
+ /*
+ * notify __ceph_remove_cap() that we are composing cap reconnect.
+ * If a cap get released before being added to the cap reconnect,
+ * __ceph_remove_cap() should skip queuing cap release.
+ */
+ session->s_cap_reconnect = 1;
/* drop old cap expires; we're about to reestablish that state */
discard_cap_releases(mdsc, session);
+ spin_unlock(&session->s_cap_lock);
/* traverse this session's caps */
- err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
+ s_nr_caps = session->s_nr_caps;
+ err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
if (err)
goto fail;
+ recon_state.nr_caps = 0;
recon_state.pagelist = pagelist;
recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
err = iterate_session_caps(session, encode_caps_cb, &recon_state);
if (err < 0)
goto fail;
+ spin_lock(&session->s_cap_lock);
+ session->s_cap_reconnect = 0;
+ spin_unlock(&session->s_cap_lock);
+
/*
* snaprealms. we provide mds with the ino, seq (version), and
* parent for all of our realms. If the mds has any newer info,
@@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
if (recon_state.flock)
reply->hdr.version = cpu_to_le16(2);
- if (pagelist->length) {
- /* set up outbound data if we have any */
- reply->hdr.data_len = cpu_to_le32(pagelist->length);
- ceph_msg_data_add_pagelist(reply, pagelist);
+
+ /* raced with cap release? */
+ if (s_nr_caps != recon_state.nr_caps) {
+ struct page *page = list_first_entry(&pagelist->head,
+ struct page, lru);
+ __le32 *addr = kmap_atomic(page);
+ *addr = cpu_to_le32(recon_state.nr_caps);
+ kunmap_atomic(addr);
}
+
+ reply->hdr.data_len = cpu_to_le32(pagelist->length);
+ ceph_msg_data_add_pagelist(reply, pagelist);
ceph_con_send(&session->s_con, reply);
mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fbbe517..4c053d099ae4 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
struct list_head s_caps; /* all caps issued by this session */
int s_nr_caps, s_trim_caps;
int s_num_cap_releases;
+ int s_cap_reconnect;
struct list_head s_cap_releases; /* waiting cap_release messages */
struct list_head s_cap_releases_done; /* ready to send */
struct ceph_cap *s_cap_iterator;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6014b0a3c405..ef4ac38bb614 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode,
int fmode, unsigned issued, unsigned wanted,
unsigned cap, unsigned seq, u64 realmino, int flags,
struct ceph_cap_reservation *caps_reservation);
-extern void __ceph_remove_cap(struct ceph_cap *cap);
-static inline void ceph_remove_cap(struct ceph_cap *cap)
-{
- spin_lock(&cap->ci->i_ceph_lock);
- __ceph_remove_cap(cap);
- spin_unlock(&cap->ci->i_ceph_lock);
-}
+extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d9ea7ada1378..f918a998a087 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -384,6 +384,7 @@ struct smb_version_operations {
int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
struct cifsFileInfo *target_file, u64 src_off, u64 len,
u64 dest_off);
+ int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
};
struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index aa3397620342..2c29db6a247e 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -477,9 +477,10 @@ extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon,
const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb);
extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
-extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
- const unsigned char *path,
- struct cifs_sb_info *cifs_sb, unsigned int xid);
+extern int CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ struct cifs_fattr *fattr,
+ const unsigned char *path);
extern int mdfour(unsigned char *, unsigned char *, int);
extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
const struct nls_table *codepage);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 124aa0230c1b..d707edb6b852 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4010,7 +4010,7 @@ QFileInfoRetry:
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
if (rc) {
- cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
+ cifs_dbg(FYI, "Send error in QFileInfo = %d", rc);
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4179,7 +4179,7 @@ UnixQFileInfoRetry:
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
if (rc) {
- cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
+ cifs_dbg(FYI, "Send error in UnixQFileInfo = %d", rc);
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4263,7 +4263,7 @@ UnixQPathInfoRetry:
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
if (rc) {
- cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
+ cifs_dbg(FYI, "Send error in UnixQPathInfo = %d", rc);
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 11ff5f116b20..a514e0a65f69 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -193,7 +193,7 @@ check_name(struct dentry *direntry)
static int
cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
struct tcon_link *tlink, unsigned oflags, umode_t mode,
- __u32 *oplock, struct cifs_fid *fid, int *created)
+ __u32 *oplock, struct cifs_fid *fid)
{
int rc = -ENOENT;
int create_options = CREATE_NOT_DIR;
@@ -349,7 +349,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
.device = 0,
};
- *created |= FILE_CREATED;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
args.uid = current_fsuid();
if (inode->i_mode & S_ISGID)
@@ -480,13 +479,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
cifs_add_pending_open(&fid, tlink, &open);
rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid, opened);
+ &oplock, &fid);
if (rc) {
cifs_del_pending_open(&open);
goto out;
}
+ if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ *opened |= FILE_CREATED;
+
rc = finish_open(file, direntry, generic_file_open, opened);
if (rc) {
if (server->ops->close)
@@ -529,7 +531,6 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
struct TCP_Server_Info *server;
struct cifs_fid fid;
__u32 oplock;
- int created = FILE_CREATED;
cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n",
inode, direntry->d_name.name, direntry);
@@ -546,7 +547,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
server->ops->new_lease_key(&fid);
rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid, &created);
+ &oplock, &fid);
if (!rc && server->ops->close)
server->ops->close(xid, tcon, &fid);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 36f9ebb93ceb..49719b8228e5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -383,7 +383,8 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* check for Minshall+French symlinks */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
- int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
+ int tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
+ full_path);
if (tmprc)
cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
}
@@ -799,7 +800,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
/* check for Minshall+French symlinks */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
- tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
+ tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
+ full_path);
if (tmprc)
cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
}
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 409b45eefe70..77492301cc2b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -26,13 +26,15 @@
#include <linux/mount.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
-#include <linux/btrfs.h>
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
+#define CIFS_IOCTL_MAGIC 0xCF
+#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
+
static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
unsigned long srcfd, u64 off, u64 len, u64 destoff)
{
@@ -213,7 +215,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
cifs_dbg(FYI, "set compress flag rc %d\n", rc);
}
break;
- case BTRFS_IOC_CLONE:
+ case CIFS_IOC_COPYCHUNK_FILE:
rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0);
break;
default:
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cc0234710ddb..92aee08483a5 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -354,34 +354,30 @@ open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
int
-CIFSCheckMFSymlink(struct cifs_fattr *fattr,
- const unsigned char *path,
- struct cifs_sb_info *cifs_sb, unsigned int xid)
+CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
+ const unsigned char *path)
{
- int rc = 0;
+ int rc;
u8 *buf = NULL;
unsigned int link_len = 0;
unsigned int bytes_read = 0;
- struct cifs_tcon *ptcon;
if (!CIFSCouldBeMFSymlink(fattr))
/* it's not a symlink */
return 0;
buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
- if (!buf) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!buf)
+ return -ENOMEM;
- ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
- if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink))
- rc = ptcon->ses->server->ops->query_mf_symlink(path, buf,
- &bytes_read, cifs_sb, xid);
+ if (tcon->ses->server->ops->query_mf_symlink)
+ rc = tcon->ses->server->ops->query_mf_symlink(path, buf,
+ &bytes_read, cifs_sb, xid);
else
- goto out;
+ rc = -ENOSYS;
- if (rc != 0)
+ if (rc)
goto out;
if (bytes_read == 0) /* not a symlink */
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 11dde4b24f8a..757da3e54d3d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -532,7 +532,10 @@ smb2_clone_range(const unsigned int xid,
int rc;
unsigned int ret_data_len;
struct copychunk_ioctl *pcchunk;
- char *retbuf = NULL;
+ struct copychunk_ioctl_rsp *retbuf = NULL;
+ struct cifs_tcon *tcon;
+ int chunks_copied = 0;
+ bool chunk_sizes_updated = false;
pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
@@ -547,27 +550,96 @@ smb2_clone_range(const unsigned int xid,
/* Note: request_res_key sets res_key null only if rc !=0 */
if (rc)
- return rc;
+ goto cchunk_out;
/* For now array only one chunk long, will make more flexible later */
pcchunk->ChunkCount = __constant_cpu_to_le32(1);
pcchunk->Reserved = 0;
- pcchunk->SourceOffset = cpu_to_le64(src_off);
- pcchunk->TargetOffset = cpu_to_le64(dest_off);
- pcchunk->Length = cpu_to_le32(len);
pcchunk->Reserved2 = 0;
- /* Request that server copy to target from src file identified by key */
- rc = SMB2_ioctl(xid, tlink_tcon(trgtfile->tlink),
- trgtfile->fid.persistent_fid,
- trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
- true /* is_fsctl */, (char *)pcchunk,
- sizeof(struct copychunk_ioctl), &retbuf, &ret_data_len);
+ tcon = tlink_tcon(trgtfile->tlink);
- /* BB need to special case rc = EINVAL to alter chunk size */
+ while (len > 0) {
+ pcchunk->SourceOffset = cpu_to_le64(src_off);
+ pcchunk->TargetOffset = cpu_to_le64(dest_off);
+ pcchunk->Length =
+ cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
- cifs_dbg(FYI, "rc %d data length out %d\n", rc, ret_data_len);
+ /* Request server copy to target from src identified by key */
+ rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
+ trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
+ true /* is_fsctl */, (char *)pcchunk,
+ sizeof(struct copychunk_ioctl), (char **)&retbuf,
+ &ret_data_len);
+ if (rc == 0) {
+ if (ret_data_len !=
+ sizeof(struct copychunk_ioctl_rsp)) {
+ cifs_dbg(VFS, "invalid cchunk response size\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ if (retbuf->TotalBytesWritten == 0) {
+ cifs_dbg(FYI, "no bytes copied\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ /*
+ * Check if server claimed to write more than we asked
+ */
+ if (le32_to_cpu(retbuf->TotalBytesWritten) >
+ le32_to_cpu(pcchunk->Length)) {
+ cifs_dbg(VFS, "invalid copy chunk response\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ if (le32_to_cpu(retbuf->ChunksWritten) != 1) {
+ cifs_dbg(VFS, "invalid num chunks written\n");
+ rc = -EIO;
+ goto cchunk_out;
+ }
+ chunks_copied++;
+
+ src_off += le32_to_cpu(retbuf->TotalBytesWritten);
+ dest_off += le32_to_cpu(retbuf->TotalBytesWritten);
+ len -= le32_to_cpu(retbuf->TotalBytesWritten);
+
+ cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n",
+ le32_to_cpu(retbuf->ChunksWritten),
+ le32_to_cpu(retbuf->ChunkBytesWritten),
+ le32_to_cpu(retbuf->TotalBytesWritten));
+ } else if (rc == -EINVAL) {
+ if (ret_data_len != sizeof(struct copychunk_ioctl_rsp))
+ goto cchunk_out;
+
+ cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n",
+ le32_to_cpu(retbuf->ChunksWritten),
+ le32_to_cpu(retbuf->ChunkBytesWritten),
+ le32_to_cpu(retbuf->TotalBytesWritten));
+
+ /*
+ * Check if this is the first request using these sizes,
+ * (ie check if copy succeed once with original sizes
+ * and check if the server gave us different sizes after
+ * we already updated max sizes on previous request).
+ * if not then why is the server returning an error now
+ */
+ if ((chunks_copied != 0) || chunk_sizes_updated)
+ goto cchunk_out;
+
+ /* Check that server is not asking us to grow size */
+ if (le32_to_cpu(retbuf->ChunkBytesWritten) <
+ tcon->max_bytes_chunk)
+ tcon->max_bytes_chunk =
+ le32_to_cpu(retbuf->ChunkBytesWritten);
+ else
+ goto cchunk_out; /* server gave us bogus size */
+
+ /* No need to change MaxChunks since already set to 1 */
+ chunk_sizes_updated = true;
+ }
+ }
+cchunk_out:
kfree(pcchunk);
return rc;
}
@@ -1247,6 +1319,7 @@ struct smb_version_operations smb30_operations = {
.create_lease_buf = smb3_create_lease_buf,
.parse_lease_buf = smb3_parse_lease_buf,
.clone_range = smb2_clone_range,
+ .validate_negotiate = smb3_validate_negotiate,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d65270c290a1..2013234b73ad 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -454,6 +454,81 @@ neg_exit:
return rc;
}
+int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
+{
+ int rc = 0;
+ struct validate_negotiate_info_req vneg_inbuf;
+ struct validate_negotiate_info_rsp *pneg_rsp;
+ u32 rsplen;
+
+ cifs_dbg(FYI, "validate negotiate\n");
+
+ /*
+ * validation ioctl must be signed, so no point sending this if we
+ * can not sign it. We could eventually change this to selectively
+ * sign just this, the first and only signed request on a connection.
+ * This is good enough for now since a user who wants better security
+ * would also enable signing on the mount. Having validation of
+ * negotiate info for signed connections helps reduce attack vectors
+ */
+ if (tcon->ses->server->sign == false)
+ return 0; /* validation requires signing */
+
+ vneg_inbuf.Capabilities =
+ cpu_to_le32(tcon->ses->server->vals->req_capabilities);
+ memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
+
+ if (tcon->ses->sign)
+ vneg_inbuf.SecurityMode =
+ cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
+ else if (global_secflags & CIFSSEC_MAY_SIGN)
+ vneg_inbuf.SecurityMode =
+ cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
+ else
+ vneg_inbuf.SecurityMode = 0;
+
+ vneg_inbuf.DialectCount = cpu_to_le16(1);
+ vneg_inbuf.Dialects[0] =
+ cpu_to_le16(tcon->ses->server->vals->protocol_id);
+
+ rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+ FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
+ (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
+ (char **)&pneg_rsp, &rsplen);
+
+ if (rc != 0) {
+ cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
+ return -EIO;
+ }
+
+ if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
+ cifs_dbg(VFS, "invalid size of protocol negotiate response\n");
+ return -EIO;
+ }
+
+ /* check validate negotiate info response matches what we got earlier */
+ if (pneg_rsp->Dialect !=
+ cpu_to_le16(tcon->ses->server->vals->protocol_id))
+ goto vneg_out;
+
+ if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode))
+ goto vneg_out;
+
+ /* do not validate server guid because not saved at negprot time yet */
+
+ if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND |
+ SMB2_LARGE_FILES) != tcon->ses->server->capabilities)
+ goto vneg_out;
+
+ /* validate negotiate successful */
+ cifs_dbg(FYI, "validate negotiate info successful\n");
+ return 0;
+
+vneg_out:
+ cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
+ return -EIO;
+}
+
int
SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
const struct nls_table *nls_cp)
@@ -829,6 +904,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
init_copy_chunk_defaults(tcon);
+ if (tcon->ses->server->ops->validate_negotiate)
+ rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
tcon_exit:
free_rsp_buf(resp_buftype, rsp);
kfree(unc_path);
@@ -1214,10 +1291,17 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
- if (rc != 0) {
+ if ((rc != 0) && (rc != -EINVAL)) {
if (tcon)
cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
goto ioctl_exit;
+ } else if (rc == -EINVAL) {
+ if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
+ (opcode != FSCTL_SRV_COPYCHUNK)) {
+ if (tcon)
+ cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
+ goto ioctl_exit;
+ }
}
/* check if caller wants to look at return data or just return rc */
@@ -2154,11 +2238,9 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0);
rsp = (struct smb2_set_info_rsp *)iov[0].iov_base;
- if (rc != 0) {
+ if (rc != 0)
cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
- goto out;
- }
-out:
+
free_rsp_buf(resp_buftype, rsp);
kfree(iov);
return rc;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f88320bbb477..2022c542ea3a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -577,13 +577,19 @@ struct copychunk_ioctl_rsp {
__le32 TotalBytesWritten;
} __packed;
-/* Response and Request are the same format */
-struct validate_negotiate_info {
+struct validate_negotiate_info_req {
__le32 Capabilities;
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
__le16 SecurityMode;
__le16 DialectCount;
- __le16 Dialect[1];
+ __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+} __packed;
+
+struct validate_negotiate_info_rsp {
+ __le32 Capabilities;
+ __u8 Guid[SMB2_CLIENT_GUID_SIZE];
+ __le16 SecurityMode;
+ __le16 Dialect; /* Dialect in use for the connection */
} __packed;
#define RSS_CAPABLE 0x00000001
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index b4eea105b08c..93adc64666f3 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -162,5 +162,6 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_lock_element *buf);
extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 *lease_key, const __le32 lease_state);
+extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
#endif /* _SMB2PROTO_H */
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index a4b2391fe66e..0e538b5c9622 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -90,7 +90,7 @@
#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */
#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
-#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */
+#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
/* Perform server-side data movement */
#define FSCTL_SRV_COPYCHUNK 0x001440F2
#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2
diff --git a/fs/dcache.c b/fs/dcache.c
index 4bdb300b16e2..6055d61811d3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -192,7 +192,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
if (!tcount)
return 0;
}
- mask = ~(~0ul << tcount*8);
+ mask = bytemask_from_count(tcount);
return unlikely(!!((a ^ b) & mask));
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 79b65c3b9e87..af903128891c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1852,8 +1852,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
goto error_tgt_fput;
/* Check if EPOLLWAKEUP is allowed */
- if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
- epds.events &= ~EPOLLWAKEUP;
+ ep_take_care_of_epollwakeup(&epds);
/*
* We have to check that the file structure underneath the file descriptor
@@ -1908,10 +1907,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
}
}
}
- if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
- tep = tf.file->private_data;
- mutex_lock_nested(&tep->mtx, 1);
- }
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 288534920fe5..20d6697bd638 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ tmp_bh.b_size = sb->s_blocksize;
err = ext2_get_block(inode, blk, &tmp_bh, 1);
if (err < 0)
goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e6185031c1cc..ece55565b9cd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -268,6 +268,16 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
+/* Mask out the low bits to get the starting block of the cluster */
+#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
+ ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
+ ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
+/* Get the cluster offset */
+#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
+ ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
+ ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
/*
* Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 17ac112ab101..3fe29de832c8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
if (WARN_ON_ONCE(err)) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
+ ext4_error_inode(inode, where, line,
+ bh->b_blocknr,
+ "journal_dirty_metadata failed: "
+ "handle type %u started at line %u, "
+ "credits %u/%u, errcode %d",
+ handle->h_type,
+ handle->h_line_no,
+ handle->h_requested_credits,
+ handle->h_buffer_credits, err);
}
} else {
if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 35f65cf4f318..4410cc3d6ee2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+ ext4_lblk_t last = lblock + len - 1;
- if (len == 0)
+ if (lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
if (depth == 0) {
/* leaf entries */
struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ ext4_fsblk_t pblock = 0;
+ ext4_lblk_t lblock = 0;
+ ext4_lblk_t prev = 0;
+ int len = 0;
while (entries) {
if (!ext4_valid_extent(inode, ext))
return 0;
+
+ /* Check for overlapping extents */
+ lblock = le32_to_cpu(ext->ee_block);
+ len = ext4_ext_get_actual_len(ext);
+ if ((lblock <= prev) && prev) {
+ pblock = ext4_ext_pblock(ext);
+ es->s_last_error_block = cpu_to_le64(pblock);
+ return 0;
+ }
ext++;
entries--;
+ prev = lblock + len - 1;
}
} else {
struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
/*
* get the next allocated block if the extent in the path
@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
b2 = ext4_ext_next_allocated_block(path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/
@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* extent, we have to mark the cluster as used (store negative
* cluster number in partial_cluster).
*/
- unaligned = pblk & (sbi->s_cluster_ratio - 1);
+ unaligned = EXT4_PBLK_COFF(sbi, pblk);
if (unaligned && (ee_len == num) &&
(*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
*partial_cluster = EXT4_B2C(sbi, pblk);
@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* accidentally freeing it later on
*/
pblk = ext4_ext_pblock(ex);
- if (pblk & (sbi->s_cluster_ratio - 1))
+ if (EXT4_PBLK_COFF(sbi, pblk))
*partial_cluster =
-((long long)EXT4_B2C(sbi, pblk));
ex--;
@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
- lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+ lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
/* Check towards left side */
- c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
if (c_offset) {
- lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+ lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
/* Now check towards right. */
- c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
if (allocated_clusters && c_offset) {
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
(rr_cluster_start == ex_cluster_start)) {
if (rr_cluster_start == ex_cluster_end)
ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
+ map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
map->m_len = min(map->m_len,
(unsigned) sbi->s_cluster_ratio - c_offset);
/*
@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ cluster_offset = EXT4_LBLK_CMASK(sbi, map->m_lblk);
/*
* If we are doing bigalloc, check to see if the extent returned
@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* needed so that future calls to get_implied_cluster_alloc()
* work correctly.
*/
- offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
ar.goal -= offset;
ar.logical -= offset;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 075763474118..61d49ff22c81 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file,
*/
static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1238,10 +1236,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
return -ENOSPC;
}
ei->i_reserved_meta_blocks += md_needed;
@@ -1255,7 +1249,6 @@ repeat:
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1297,10 +1289,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4d113efa024c..04a5c7504be9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
{
struct ext4_prealloc_space *pa;
pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+
+ BUG_ON(atomic_read(&pa->pa_count));
+ BUG_ON(pa->pa_deleted == 0);
kmem_cache_free(ext4_pspace_cachep, pa);
}
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
ext4_group_t grp;
ext4_fsblk_t grp_blk;
- if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
- return;
-
/* in this short window concurrent discard can set pa_deleted */
spin_lock(&pa->pa_lock);
+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
+ spin_unlock(&pa->pa_lock);
+ return;
+ }
+
if (pa->pa_deleted == 1) {
spin_unlock(&pa->pa_lock);
return;
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
- ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
+ ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* blocks at the beginning or the end unless we are explicitly
* requested to avoid doing so.
*/
- overflow = block & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_PBLK_COFF(sbi, block);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
overflow = sbi->s_cluster_ratio - overflow;
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
count += overflow;
}
}
- overflow = count & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_LBLK_COFF(sbi, count);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
if (count > overflow)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c977f4e4e63b..1f7784de05b6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb)
}
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
+ del_timer_sync(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
@@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb)
}
-static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
/*
+ * There's no need to reserve anything when we aren't using extents.
+ * The space estimates are exact, there are no unwritten extents,
+ * hole punching doesn't need new metadata... This is needed especially
+ * to keep ext2/3 backward compatibility.
+ */
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+ return 0;
+ /*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
* out of space like for example punch hole, or converting
@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
- resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+ resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
+ EXT4_SB(sb)->s_cluster_bits;
do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -4071,10 +4080,10 @@ no_journal:
"available");
}
- err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
+ err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
- "reserved pool", ext4_calculate_resv_clusters(sbi));
+ "reserved pool", ext4_calculate_resv_clusters(sb));
goto failed_mount4a;
}
@@ -4184,7 +4193,7 @@ failed_mount_wq:
}
failed_mount3:
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
+ del_timer_sync(&sbi->s_err_report);
if (sbi->s_flex_groups)
ext4_kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index b7fc035a6943..73f3e4ee4037 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = inode->i_mapping;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int rv;
@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
+ /*
+ * Now since we are holding a deferred (CW) lock at this point, you
+ * might be wondering why this is ever needed. There is a case however
+ * where we've granted a deferred local lock against a cached exclusive
+ * glock. That is ok provided all granted local locks are deferred, but
+ * it also means that it is possible to encounter pages which are
+ * cached and possibly also mapped. So here we check for that and sort
+ * them out ahead of the dio. The glock state machine will take care of
+ * everything else.
+ *
+ * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
+ * the first place, mapping->nr_pages will always be zero.
+ */
+ if (mapping->nrpages) {
+ loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+ loff_t len = iov_length(iov, nr_segs);
+ loff_t end = PAGE_ALIGN(offset + len) - 1;
+
+ rv = 0;
+ if (len == 0)
+ goto out;
+ if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
+ rv = filemap_write_and_wait_range(mapping, lstart, end);
+ if (rv)
+ return rv;
+ truncate_inode_pages_range(mapping, lstart, end);
+ }
+
rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gfs2_get_block_direct,
NULL, NULL, 0);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c8420f7e4db6..6f7a47c05259 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
struct task_struct *gh_owner = NULL;
char flags_buf[32];
+ rcu_read_lock();
if (gh->gh_owner_pid)
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
gh_owner ? gh_owner->comm : "(ended)",
(void *)gh->gh_ip);
+ rcu_read_unlock();
return 0;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index db908f697139..f88dcd925010 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
- if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
- unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+ if (ip) {
+ if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+ inode_dio_wait(&ip->i_inode);
+ }
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return;
@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh)
return error;
}
+ if (gh->gh_state != LM_ST_DEFERRED)
+ inode_dio_wait(&ip->i_inode);
+
if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
(gh->gh_state == LM_ST_EXCLUSIVE)) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 610613fb65b5..9dcb9777a5f8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
struct buffer_head *bh = bd->bd_bh;
struct gfs2_glock *gl = bd->bd_gl;
- gfs2_remove_from_ail(bd);
- bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
+ gfs2_remove_from_ail(bd); /* drops ref on bh */
+ bd->bd_bh = NULL;
bd->bd_ops = &gfs2_revoke_lops;
sdp->sd_log_num_revoke++;
atomic_inc(&gl->gl_revokes);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 932415050540..52f177be3bf8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
struct address_space *mapping = bh->b_page->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private;
+ int was_pinned = 0;
if (test_clear_buffer_pinned(bh)) {
trace_gfs2_pin(bd, 0);
@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
tr->tr_num_databuf_rm++;
}
tr->tr_touched = 1;
+ was_pinned = 1;
brelse(bh);
}
if (bd) {
spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) {
gfs2_trans_add_revoke(sdp, bd);
+ } else if (was_pinned) {
+ bh->b_private = NULL;
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
}
spin_unlock(&sdp->sd_ail_lock);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 82303b474958..52fa88314f5c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (IS_ERR(s))
goto error_bdev;
- if (s->s_root)
+ if (s->s_root) {
+ /*
+ * s_umount nests inside bd_mutex during
+ * __invalidate_device(). blkdev_put() acquires
+ * bd_mutex and can't be called under s_umount. Drop
+ * s_umount temporarily. This is safe as we're
+ * holding an active reference.
+ */
+ up_write(&s->s_umount);
blkdev_put(bdev, mode);
+ down_write(&s->s_umount);
+ }
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index b51a6079108d..e9a97a0d4314 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -24,13 +24,6 @@ struct hfsplus_wd {
u16 embed_count;
};
-static void hfsplus_end_io_sync(struct bio *bio, int err)
-{
- if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- complete(bio->bi_private);
-}
-
/*
* hfsplus_submit_bio - Perfrom block I/O
* @sb: super block of volume for I/O
@@ -53,7 +46,6 @@ static void hfsplus_end_io_sync(struct bio *bio, int err)
int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
void *buf, void **data, int rw)
{
- DECLARE_COMPLETION_ONSTACK(wait);
struct bio *bio;
int ret = 0;
u64 io_size;
@@ -73,8 +65,6 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_sector = sector;
bio->bi_bdev = sb->s_bdev;
- bio->bi_end_io = hfsplus_end_io_sync;
- bio->bi_private = &wait;
if (!(rw & WRITE) && data)
*data = (u8 *)buf + offset;
@@ -93,12 +83,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
buf = (u8 *)buf + len;
}
- submit_bio(rw, bio);
- wait_for_completion(&wait);
-
- if (!bio_flagged(bio, BIO_UPTODATE))
- ret = -EIO;
-
+ ret = submit_bio_wait(rw, bio);
out:
bio_put(bio);
return ret < 0 ? ret : 0;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 52032647dd4a..5fa344afb49a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
read_lock(&journal->j_state_lock);
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
- printk(KERN_EMERG
+ printk(KERN_ERR
"%s: error: j_commit_request=%d, tid=%d\n",
__func__, journal->j_commit_request, tid);
}
@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
}
read_unlock(&journal->j_state_lock);
- if (unlikely(is_journal_aborted(journal))) {
- printk(KERN_EMERG "journal commit I/O error\n");
+ if (unlikely(is_journal_aborted(journal)))
err = -EIO;
- }
return err;
}
@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal)
if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
/* Can't have checksum v1 and v2 on at the same time! */
- printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 "
+ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
"at the same time!\n");
goto out;
}
if (!jbd2_verify_csum_type(journal, sb)) {
- printk(KERN_ERR "JBD: Unknown checksum type\n");
+ printk(KERN_ERR "JBD2: Unknown checksum type\n");
goto out;
}
@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal)
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD: Cannot load crc32c driver.\n");
+ printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
err = PTR_ERR(journal->j_chksum_driver);
journal->j_chksum_driver = NULL;
goto out;
@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal)
/* Check superblock checksum */
if (!jbd2_superblock_csum_verify(journal, sb)) {
- printk(KERN_ERR "JBD: journal checksum error\n");
+ printk(KERN_ERR "JBD2: journal checksum error\n");
goto out;
}
@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
journal->j_chksum_driver = crypto_alloc_shash("crc32c",
0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD: Cannot load crc32c "
+ printk(KERN_ERR "JBD2: Cannot load crc32c "
"driver.\n");
journal->j_chksum_driver = NULL;
return 0;
@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void)
#ifdef CONFIG_JBD2_DEBUG
int n = atomic_read(&nr_journal_heads);
if (n)
- printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
+ printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
#endif
jbd2_remove_jbd_stats_proc_entry();
jbd2_journal_destroy_caches();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3929c50428b1..3b6bb19d60b1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal,
be32_to_cpu(tmp->h_sequence))) {
brelse(obh);
success = -EIO;
- printk(KERN_ERR "JBD: Invalid "
+ printk(KERN_ERR "JBD2: Invalid "
"checksum recovering "
"block %llu in log\n",
blocknr);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7aa9a32573bb..8360674c85bc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -932,7 +932,7 @@ repeat:
jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS);
if (!frozen_buffer) {
- printk(KERN_EMERG
+ printk(KERN_ERR
"%s: OOM for frozen_buffer\n",
__func__);
JBUFFER_TRACE(jh, "oom!");
@@ -1166,7 +1166,7 @@ repeat:
if (!jh->b_committed_data) {
committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
- printk(KERN_EMERG "%s: No memory for committed data\n",
+ printk(KERN_ERR "%s: No memory for committed data\n",
__func__);
err = -ENOMEM;
goto out;
@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* once a transaction -bzzz
*/
jh->b_modified = 1;
- J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+ if (handle->h_buffer_credits <= 0) {
+ ret = -ENOSPC;
+ goto out_unlock_bh;
+ }
handle->h_buffer_credits--;
}
@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "fastpath");
if (unlikely(jh->b_transaction !=
journal->j_running_transaction)) {
- printk(KERN_EMERG "JBD: %s: "
+ printk(KERN_ERR "JBD2: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_running_transaction (%p, %u)",
journal->j_devname,
@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "already on other transaction");
if (unlikely(jh->b_transaction !=
journal->j_committing_transaction)) {
- printk(KERN_EMERG "JBD: %s: "
+ printk(KERN_ERR "JBD2: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_committing_transaction (%p, %u)",
journal->j_devname,
@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
ret = -EINVAL;
}
if (unlikely(jh->b_next_transaction != transaction)) {
- printk(KERN_EMERG "JBD: %s: "
+ printk(KERN_ERR "JBD2: %s: "
"jh->b_next_transaction (%llu, %p, %u) != "
"transaction (%p, %u)",
journal->j_devname,
@@ -1373,7 +1376,6 @@ out_unlock_bh:
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
- WARN_ON(ret); /* All errors are bugs, so dump the stack */
return ret;
}
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 550475ca6a0e..0f95f0d0b313 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -14,16 +14,10 @@
#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
-static void request_complete(struct bio *bio, int err)
-{
- complete((struct completion *)bio->bi_private);
-}
-
static int sync_request(struct page *page, struct block_device *bdev, int rw)
{
struct bio bio;
struct bio_vec bio_vec;
- struct completion complete;
bio_init(&bio);
bio.bi_max_vecs = 1;
@@ -35,13 +29,8 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
bio.bi_size = PAGE_SIZE;
bio.bi_bdev = bdev;
bio.bi_sector = page->index * (PAGE_SIZE >> 9);
- init_completion(&complete);
- bio.bi_private = &complete;
- bio.bi_end_io = request_complete;
- submit_bio(rw, &bio);
- wait_for_completion(&complete);
- return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
+ return submit_bio_wait(rw, &bio);
}
static int bdev_readpage(void *_sb, struct page *page)
diff --git a/fs/namei.c b/fs/namei.c
index 8f77a8cea289..3531deebad30 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -513,8 +513,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
if (!lockref_get_not_dead(&parent->d_lockref)) {
nd->path.dentry = NULL;
- rcu_read_unlock();
- return -ECHILD;
+ goto out;
}
/*
@@ -1599,11 +1598,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
* do a "get_unaligned()" if this helps and is sufficiently
* fast.
*
- * - Little-endian machines (so that we can generate the mask
- * of low bytes efficiently). Again, we *could* do a byte
- * swapping load on big-endian architectures if that is not
- * expensive enough to make the optimization worthless.
- *
* - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
* do not trap on the (extremely unlikely) case of a page
* crossing operation.
@@ -1647,7 +1641,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
if (!len)
goto done;
}
- mask = ~(~0ul << len*8);
+ mask = bytemask_from_count(len);
hash += mask & a;
done:
return fold_hash(hash);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 8485978993e8..9838fb020473 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -36,6 +36,7 @@
#include <linux/nfs_fs.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include "../nfs4_fs.h"
#include "../pnfs.h"
#include "../netns.h"
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 9c3e117c3ed1..4d0161442565 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - do_div(tmp, base);
+ return s - sector_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index fc0f95ec7358..d25f10fb4926 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -46,7 +46,9 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
#include "dns_resolve.h"
#include "cache_lib.h"
#include "netns.h"
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 18ab2da4eeb6..00ad1c2b217d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -312,7 +312,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags)
}
EXPORT_SYMBOL_GPL(nfs4_label_alloc);
#else
-void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
+void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bca6a3e3c49c..8b5cc04a8611 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -269,6 +269,21 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
+static inline void nfs4_label_free(struct nfs4_label *label)
+{
+ if (label) {
+ kfree(label->label);
+ kfree(label);
+ }
+ return;
+}
+#else
+static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
+static inline void nfs4_label_free(void *label) {}
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3ce79b04522e..5609edc742a0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -9,6 +9,14 @@
#ifndef __LINUX_FS_NFS_NFS4_FS_H
#define __LINUX_FS_NFS_NFS4_FS_H
+#if defined(CONFIG_NFS_V4_2)
+#define NFS4_MAX_MINOR_VERSION 2
+#elif defined(CONFIG_NFS_V4_1)
+#define NFS4_MAX_MINOR_VERSION 1
+#else
+#define NFS4_MAX_MINOR_VERSION 0
+#endif
+
#if IS_ENABLED(CONFIG_NFS_V4)
#define NFS4_MAX_LOOP_ON_RECOVER (10)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 659990c0109e..15052b81df42 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2518,9 +2518,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
calldata->roc_barrier);
nfs_set_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
- nfs4_close_clear_stateid_flags(state,
- calldata->arg.fmode);
break;
+ case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
@@ -2528,9 +2527,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0)
break;
default:
- if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
+ if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
rpc_restart_call_prepare(task);
+ goto out_release;
+ }
}
+ nfs4_close_clear_stateid_flags(state, calldata->arg.fmode);
+out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
@@ -4802,7 +4805,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- goto restart_call;
+ goto wait_on_recovery;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
@@ -4987,11 +4990,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
switch (task->tk_status) {
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
case 0:
renew_lease(data->res.server, data->timestamp);
break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ task->tk_status = 0;
+ break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-EAGAIN) {
@@ -7589,7 +7598,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
server = NFS_SERVER(lrp->args.inode);
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ switch (task->tk_status) {
+ default:
+ task->tk_status = 0;
+ case 0:
+ break;
+ case -NFS4ERR_DELAY:
+ if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
+ break;
rpc_restart_call_prepare(task);
return;
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 9186c7ce0b14..b6af150c96b8 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -132,6 +132,13 @@ nfsd_reply_cache_alloc(void)
}
static void
+nfsd_reply_cache_unhash(struct svc_cacherep *rp)
+{
+ hlist_del_init(&rp->c_hash);
+ list_del_init(&rp->c_lru);
+}
+
+static void
nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
{
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
@@ -417,7 +424,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
if (nfsd_cache_entry_expired(rp) ||
num_drc_entries >= max_drc_entries) {
- lru_put_end(rp);
+ nfsd_reply_cache_unhash(rp);
prune_cache_entries();
goto search_cache;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index d2c45e14e6d8..0e0752ef2715 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait)
return mask;
}
+static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
+{
+ int kill = 0;
+
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (kill)
+ free_pipe_info(pipe);
+}
+
static int
pipe_release(struct inode *inode, struct file *file)
{
- struct pipe_inode_info *pipe = inode->i_pipe;
- int kill = 0;
+ struct pipe_inode_info *pipe = file->private_data;
__pipe_lock(pipe);
if (file->f_mode & FMODE_READ)
@@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
-
+ put_pipe_info(inode, pipe);
return 0;
}
@@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp)
{
struct pipe_inode_info *pipe;
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
- int kill = 0;
int ret;
filp->f_version = 0;
@@ -1130,15 +1135,9 @@ err_wr:
goto err;
err:
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
+
+ put_pipe_info(inode, pipe);
return ret;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 28955d4b7218..124fc43c7090 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -292,16 +292,20 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
{
struct proc_dir_entry *pde = PDE(file_inode(file));
unsigned long rv = -EIO;
- unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
- unsigned long, unsigned long) = NULL;
+
if (use_pde(pde)) {
+ typeof(proc_reg_get_unmapped_area) *get_area;
+
+ get_area = pde->proc_fops->get_unmapped_area;
#ifdef CONFIG_MMU
- get_area = current->mm->get_unmapped_area;
+ if (!get_area)
+ get_area = current->mm->get_unmapped_area;
#endif
- if (pde->proc_fops->get_unmapped_area)
- get_area = pde->proc_fops->get_unmapped_area;
+
if (get_area)
rv = get_area(file, orig_addr, len, pgoff, flags);
+ else
+ rv = orig_addr;
unuse_pde(pde);
}
return rv;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b8e93a40a5d3..78c3c2097787 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi)
pstore_get_records(0);
kmsg_dump_register(&pstore_dumper);
- pstore_register_console();
- pstore_register_ftrace();
+
+ if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
+ pstore_register_console();
+ pstore_register_ftrace();
+ }
if (pstore_update_ms >= 0) {
pstore_timer.expires = jiffies +
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 2943b2bfae48..62a0de6632e1 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -84,6 +84,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
*/
res = squashfs_read_cache(target_page, block, bsize, pages,
page);
+ if (res < 0)
+ goto mark_errored;
+
goto out;
}
@@ -119,7 +122,7 @@ mark_errored:
* dealt with by the caller
*/
for (i = 0; i < pages; i++) {
- if (page[i] == target_page)
+ if (page[i] == NULL || page[i] == target_page)
continue;
flush_dcache_page(page[i]);
SetPageError(page[i]);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 79b5da2acbe1..35e7d08fe629 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -649,7 +649,23 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
if (!of)
goto err_out;
- mutex_init(&of->mutex);
+ /*
+ * The following is done to give a different lockdep key to
+ * @of->mutex for files which implement mmap. This is a rather
+ * crude way to avoid false positive lockdep warning around
+ * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
+ * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
+ * which mm->mmap_sem nests, while holding @of->mutex. As each
+ * open file has a separate mutex, it's okay as long as those don't
+ * happen on the same file. At this point, we can't easily give
+ * each file a separate locking class. Let's differentiate on
+ * whether the file is bin or not for now.
+ */
+ if (sysfs_is_bin(attr_sd))
+ mutex_init(&of->mutex);
+ else
+ mutex_init(&of->mutex);
+
of->sd = attr_sd;
of->file = file;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3ef11b22e750..3b2c14b6f0fb 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1635,7 +1635,7 @@ xfs_bmap_last_extent(
* blocks at the end of the file which do not start at the previous data block,
* we will try to align the new blocks at stripe unit boundaries.
*
- * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
+ * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
* at, or past the EOF.
*/
STATIC int
@@ -1650,9 +1650,14 @@ xfs_bmap_isaeof(
bma->aeof = 0;
error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
&is_empty);
- if (error || is_empty)
+ if (error)
return error;
+ if (is_empty) {
+ bma->aeof = 1;
+ return 0;
+ }
+
/*
* Check if we are allocation or past the last extent, or at least into
* the last delayed allocated extent.
@@ -3643,10 +3648,19 @@ xfs_bmap_btalloc(
int isaligned;
int tryagain;
int error;
+ int stripe_align;
ASSERT(ap->length);
mp = ap->ip->i_mount;
+
+ /* stripe alignment for allocation is determined by mount parameters */
+ stripe_align = 0;
+ if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+ stripe_align = mp->m_swidth;
+ else if (mp->m_dalign)
+ stripe_align = mp->m_dalign;
+
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3655,6 +3669,8 @@ xfs_bmap_btalloc(
ASSERT(!error);
ASSERT(ap->length);
}
+
+
nullfb = *ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
if (nullfb) {
@@ -3730,7 +3746,7 @@ xfs_bmap_btalloc(
*/
if (!ap->flist->xbf_low && ap->aeof) {
if (!ap->offset) {
- args.alignment = mp->m_dalign;
+ args.alignment = stripe_align;
atype = args.type;
isaligned = 1;
/*
@@ -3755,13 +3771,13 @@ xfs_bmap_btalloc(
* of minlen+alignment+slop doesn't go up
* between the calls.
*/
- if (blen > mp->m_dalign && blen <= args.maxlen)
- nextminlen = blen - mp->m_dalign;
+ if (blen > stripe_align && blen <= args.maxlen)
+ nextminlen = blen - stripe_align;
else
nextminlen = args.minlen;
- if (nextminlen + mp->m_dalign > args.minlen + 1)
+ if (nextminlen + stripe_align > args.minlen + 1)
args.minalignslop =
- nextminlen + mp->m_dalign -
+ nextminlen + stripe_align -
args.minlen - 1;
else
args.minalignslop = 0;
@@ -3783,7 +3799,7 @@ xfs_bmap_btalloc(
*/
args.type = atype;
args.fsbno = ap->blkno;
- args.alignment = mp->m_dalign;
+ args.alignment = stripe_align;
args.minlen = nextminlen;
args.minalignslop = 0;
isaligned = 1;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5887e41c0323..1394106ed22d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1187,7 +1187,12 @@ xfs_zero_remaining_bytes(
XFS_BUF_UNWRITE(bp);
XFS_BUF_READ(bp);
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
- xfsbdstrat(mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = XFS_ERROR(EIO);
+ break;
+ }
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp,
@@ -1200,7 +1205,12 @@ xfs_zero_remaining_bytes(
XFS_BUF_UNDONE(bp);
XFS_BUF_UNREAD(bp);
XFS_BUF_WRITE(bp);
- xfsbdstrat(mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ error = XFS_ERROR(EIO);
+ break;
+ }
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c7f0b77dcb00..afe7645e4b2b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -698,7 +698,11 @@ xfs_buf_read_uncached(
bp->b_flags |= XBF_READ;
bp->b_ops = ops;
- xfsbdstrat(target->bt_mount, bp);
+ if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
+ xfs_buf_relse(bp);
+ return NULL;
+ }
+ xfs_buf_iorequest(bp);
xfs_buf_iowait(bp);
return bp;
}
@@ -1089,7 +1093,7 @@ xfs_bioerror(
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
-STATIC int
+int
xfs_bioerror_relse(
struct xfs_buf *bp)
{
@@ -1152,7 +1156,7 @@ xfs_bwrite(
ASSERT(xfs_buf_islocked(bp));
bp->b_flags |= XBF_WRITE;
- bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+ bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
xfs_bdstrat_cb(bp);
@@ -1164,25 +1168,6 @@ xfs_bwrite(
return error;
}
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem. Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- if (XFS_FORCED_SHUTDOWN(mp)) {
- trace_xfs_bdstrat_shut(bp, _RET_IP_);
- xfs_bioerror_relse(bp);
- return;
- }
-
- xfs_buf_iorequest(bp);
-}
-
STATIC void
_xfs_buf_ioend(
xfs_buf_t *bp,
@@ -1516,6 +1501,12 @@ xfs_wait_buftarg(
struct xfs_buf *bp;
bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
list_del_init(&bp->b_lru);
+ if (bp->b_flags & XBF_WRITE_FAIL) {
+ xfs_alert(btp->bt_mount,
+"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
+"Please run xfs_repair to determine the extent of the problem.",
+ (long long)bp->b_bn);
+ }
xfs_buf_rele(bp);
}
if (loop++ != 0)
@@ -1799,7 +1790,7 @@ __xfs_buf_delwri_submit(
blk_start_plug(&plug);
list_for_each_entry_safe(bp, n, io_list, b_list) {
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
+ bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
bp->b_flags |= XBF_WRITE;
if (!wait) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e65683361017..1cf21a4a9f22 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -45,6 +45,7 @@ typedef enum {
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
+#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */
/* I/O hints for the BIO layer */
#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_ASYNC, "ASYNC" }, \
{ XBF_DONE, "DONE" }, \
{ XBF_STALE, "STALE" }, \
+ { XBF_WRITE_FAIL, "WRITE_FAIL" }, \
{ XBF_SYNCIO, "SYNCIO" }, \
{ XBF_FUA, "FUA" }, \
{ XBF_FLUSH, "FLUSH" }, \
@@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
{ _XBF_COMPOUND, "COMPOUND" }
+
/*
* Internal state flags.
*/
@@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_buf *bp);
-
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-
extern void xfs_buf_ioend(xfs_buf_t *, int);
extern void xfs_buf_ioerror(xfs_buf_t *, int);
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
@@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
#define xfs_buf_zero(bp, off, len) \
xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+extern int xfs_bioerror_relse(struct xfs_buf *);
+
static inline int xfs_buf_geterror(xfs_buf_t *bp)
{
return bp ? bp->b_error : ENOMEM;
@@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_ZEROFLAGS(bp) \
((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
- XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+ XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
+ XBF_WRITE_FAIL))
void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a64f67ba25d3..2227b9b050bb 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -496,6 +496,14 @@ xfs_buf_item_unpin(
}
}
+/*
+ * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
+ * seconds so as to not spam logs too much on repeated detection of the same
+ * buffer being bad..
+ */
+
+DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
+
STATIC uint
xfs_buf_item_push(
struct xfs_log_item *lip,
@@ -524,6 +532,14 @@ xfs_buf_item_push(
trace_xfs_buf_item_push(bip);
+ /* has a previous flush failed due to IO errors? */
+ if ((bp->b_flags & XBF_WRITE_FAIL) &&
+ ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
+ xfs_warn(bp->b_target->bt_mount,
+"Detected failing async write on buffer block 0x%llx. Retrying async write.\n",
+ (long long)bp->b_bn);
+ }
+
if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING;
xfs_buf_unlock(bp);
@@ -1096,8 +1112,9 @@ xfs_buf_iodone_callbacks(
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
- if (!XFS_BUF_ISSTALE(bp)) {
- bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
+ if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
+ bp->b_flags |= XBF_WRITE | XBF_ASYNC |
+ XBF_DONE | XBF_WRITE_FAIL;
xfs_buf_iorequest(bp);
} else {
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56369d4509d5..48c7d18f68c3 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup(
*/
int /* error */
xfs_dir2_node_removename(
- xfs_da_args_t *args) /* operation arguments */
+ struct xfs_da_args *args) /* operation arguments */
{
- xfs_da_state_blk_t *blk; /* leaf block */
+ struct xfs_da_state_blk *blk; /* leaf block */
int error; /* error return value */
int rval; /* operation return value */
- xfs_da_state_t *state; /* btree cursor */
+ struct xfs_da_state *state; /* btree cursor */
trace_xfs_dir2_node_removename(args);
@@ -2084,19 +2084,18 @@ xfs_dir2_node_removename(
state->mp = args->dp->i_mount;
state->blocksize = state->mp->m_dirblksize;
state->node_ents = state->mp->m_dir_node_ents;
- /*
- * Look up the entry we're deleting, set up the cursor.
- */
+
+ /* Look up the entry we're deleting, set up the cursor. */
error = xfs_da3_node_lookup_int(state, &rval);
if (error)
- rval = error;
- /*
- * Didn't find it, upper layer screwed up.
- */
+ goto out_free;
+
+ /* Didn't find it, upper layer screwed up. */
if (rval != EEXIST) {
- xfs_da_state_free(state);
- return rval;
+ error = rval;
+ goto out_free;
}
+
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
ASSERT(state->extravalid);
@@ -2107,7 +2106,7 @@ xfs_dir2_node_removename(
error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
&state->extrablk, &rval);
if (error)
- return error;
+ goto out_free;
/*
* Fix the hash values up the btree.
*/
@@ -2122,6 +2121,7 @@ xfs_dir2_node_removename(
*/
if (!error)
error = xfs_dir2_node_to_leaf(state);
+out_free:
xfs_da_state_free(state);
return error;
}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 8367d6dc18c9..4f11ef011139 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -157,7 +157,7 @@ xfs_ioc_trim(
struct xfs_mount *mp,
struct fstrim_range __user *urange)
{
- struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+ struct request_queue *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev);
unsigned int granularity = q->limits.discard_granularity;
struct fstrim_range range;
xfs_daddr_t start, end, minlen;
@@ -180,7 +180,8 @@ xfs_ioc_trim(
* matter as trimming blocks is an advisory interface.
*/
if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
- range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
+ range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
+ range.len < mp->m_sb.sb_blocksize)
return -XFS_ERROR(EINVAL);
start = BTOBB(range.start);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a6e54b3319bd..02fb943cbf22 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -220,6 +220,8 @@ xfs_growfs_data_private(
*/
nfree = 0;
for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
+ __be32 *agfl_bno;
+
/*
* AG freespace header block
*/
@@ -279,8 +281,10 @@ xfs_growfs_data_private(
agfl->agfl_seqno = cpu_to_be32(agno);
uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
}
+
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
- agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
+ agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 4d613401a5e0..33ad9a77791f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -442,7 +442,8 @@ xfs_attrlist_by_handle(
return -XFS_ERROR(EPERM);
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index e8fb1231db81..a7992f8de9d3 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -356,7 +356,8 @@ xfs_compat_attrlist_by_handle(
if (copy_from_user(&al_hreq, arg,
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 27e0e544e963..104455b8046c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -618,7 +618,8 @@ xfs_setattr_nonsize(
}
if (!gid_eq(igid, gid)) {
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
- ASSERT(!XFS_IS_PQUOTA_ON(mp));
+ ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
+ !XFS_IS_PQUOTA_ON(mp));
ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
olddquot2 = xfs_qm_vop_chown(tp, ip,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b6b669df40f3..eae16920655b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,7 +193,10 @@ xlog_bread_noalign(
bp->b_io_length = nbblks;
bp->b_error = 0;
- xfsbdstrat(log->l_mp, bp);
+ if (XFS_FORCED_SHUTDOWN(log->l_mp))
+ return XFS_ERROR(EIO);
+
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error)
xfs_buf_ioerror_alert(bp, __func__);
@@ -4397,7 +4400,13 @@ xlog_do_recover(
XFS_BUF_READ(bp);
XFS_BUF_UNASYNC(bp);
bp->b_ops = &xfs_sb_buf_ops;
- xfsbdstrat(log->l_mp, bp);
+
+ if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
+ xfs_buf_relse(bp);
+ return XFS_ERROR(EIO);
+ }
+
+ xfs_buf_iorequest(bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp, __func__);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996cfec6..dd88f0e27bd8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,8 +134,6 @@ xfs_qm_dqpurge(
{
struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *pdqp = NULL;
xfs_dqlock(dqp);
if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
@@ -143,21 +141,6 @@ xfs_qm_dqpurge(
return EAGAIN;
}
- /*
- * If this quota has a hint attached, prepare for releasing it now.
- */
- gdqp = dqp->q_gdquot;
- if (gdqp) {
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
-
- pdqp = dqp->q_pdquot;
- if (pdqp) {
- xfs_dqlock(pdqp);
- dqp->q_pdquot = NULL;
- }
-
dqp->dq_flags |= XFS_DQ_FREEING;
xfs_dqflock(dqp);
@@ -206,11 +189,47 @@ xfs_qm_dqpurge(
XFS_STATS_DEC(xs_qm_dquot_unused);
xfs_qm_dqdestroy(dqp);
+ return 0;
+}
+
+/*
+ * Release the group or project dquot pointers the user dquots maybe carrying
+ * around as a hint, and proceed to purge the user dquot cache if requested.
+*/
+STATIC int
+xfs_qm_dqpurge_hints(
+ struct xfs_dquot *dqp,
+ void *data)
+{
+ struct xfs_dquot *gdqp = NULL;
+ struct xfs_dquot *pdqp = NULL;
+ uint flags = *((uint *)data);
+
+ xfs_dqlock(dqp);
+ if (dqp->dq_flags & XFS_DQ_FREEING) {
+ xfs_dqunlock(dqp);
+ return EAGAIN;
+ }
+
+ /* If this quota has a hint attached, prepare for releasing it now */
+ gdqp = dqp->q_gdquot;
+ if (gdqp)
+ dqp->q_gdquot = NULL;
+
+ pdqp = dqp->q_pdquot;
+ if (pdqp)
+ dqp->q_pdquot = NULL;
+
+ xfs_dqunlock(dqp);
if (gdqp)
- xfs_qm_dqput(gdqp);
+ xfs_qm_dqrele(gdqp);
if (pdqp)
- xfs_qm_dqput(pdqp);
+ xfs_qm_dqrele(pdqp);
+
+ if (flags & XFS_QMOPT_UQUOTA)
+ return xfs_qm_dqpurge(dqp, NULL);
+
return 0;
}
@@ -222,8 +241,18 @@ xfs_qm_dqpurge_all(
struct xfs_mount *mp,
uint flags)
{
- if (flags & XFS_QMOPT_UQUOTA)
- xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
+ /*
+ * We have to release group/project dquot hint(s) from the user dquot
+ * at first if they are there, otherwise we would run into an infinite
+ * loop while walking through radix tree to purge other type of dquots
+ * since their refcount is not zero if the user dquot refers to them
+ * as hint.
+ *
+ * Call the special xfs_qm_dqpurge_hints() will end up go through the
+ * general xfs_qm_dqpurge() against user dquot cache if requested.
+ */
+ xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
+
if (flags & XFS_QMOPT_GQUOTA)
xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
if (flags & XFS_QMOPT_PQUOTA)
@@ -2082,24 +2111,21 @@ xfs_qm_vop_create_dqattach(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
- if (udqp) {
+ if (udqp && XFS_IS_UQUOTA_ON(mp)) {
ASSERT(ip->i_udquot == NULL);
- ASSERT(XFS_IS_UQUOTA_ON(mp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
ip->i_udquot = xfs_qm_dqhold(udqp);
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
- if (gdqp) {
+ if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
ASSERT(ip->i_gdquot == NULL);
- ASSERT(XFS_IS_GQUOTA_ON(mp));
ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
ip->i_gdquot = xfs_qm_dqhold(gdqp);
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
}
- if (pdqp) {
+ if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
ASSERT(ip->i_pdquot == NULL);
- ASSERT(XFS_IS_PQUOTA_ON(mp));
ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
ip->i_pdquot = xfs_qm_dqhold(pdqp);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c035d11b7734..647b6f1d8923 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -314,7 +314,18 @@ xfs_trans_read_buf_map(
ASSERT(bp->b_iodone == NULL);
XFS_BUF_READ(bp);
bp->b_ops = ops;
- xfsbdstrat(tp->t_mountp, bp);
+
+ /*
+ * XXX(hch): clean up the error handling here to be less
+ * of a mess..
+ */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ trace_xfs_bdstrat_shut(bp, _RET_IP_);
+ xfs_bioerror_relse(bp);
+ } else {
+ xfs_buf_iorequest(bp);
+ }
+
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_ioerror_alert(bp, __func__);