summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c153
-rw-r--r--fs/autofs/Makefile4
-rw-r--r--fs/autofs/dev-ioctl.c22
-rw-r--r--fs/autofs/init.c2
-rw-r--r--fs/binfmt_elf.c5
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/extent_io.c12
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/ioctl.c12
-rw-r--r--fs/btrfs/qgroup.c17
-rw-r--r--fs/btrfs/scrub.c17
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/cachefiles/bind.c3
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/cachefiles/rdwr.c17
-rw-r--r--fs/ceph/inode.c1
-rw-r--r--fs/cifs/cifs_debug.c29
-rw-r--r--fs/cifs/cifsencrypt.c20
-rw-r--r--fs/cifs/cifsglob.h43
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/cifssmb.c18
-rw-r--r--fs/cifs/connect.c16
-rw-r--r--fs/cifs/inode.c13
-rw-r--r--fs/cifs/misc.c9
-rw-r--r--fs/cifs/smb1ops.c1
-rw-r--r--fs/cifs/smb2file.c11
-rw-r--r--fs/cifs/smb2misc.c19
-rw-r--r--fs/cifs/smb2ops.c289
-rw-r--r--fs/cifs/smb2pdu.c365
-rw-r--r--fs/cifs/smb2pdu.h29
-rw-r--r--fs/cifs/smb2proto.h6
-rw-r--r--fs/cifs/smb2transport.c74
-rw-r--r--fs/cifs/smbdirect.c22
-rw-r--r--fs/cifs/smbdirect.h4
-rw-r--r--fs/cifs/trace.h3
-rw-r--r--fs/cifs/transport.c194
-rw-r--r--fs/dcache.c13
-rw-r--r--fs/eventfd.c19
-rw-r--r--fs/eventpoll.c15
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/super.c6
-rw-r--r--fs/ext4/balloc.c24
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/ialloc.c22
-rw-r--r--fs/ext4/inline.c58
-rw-r--r--fs/ext4/inode.c23
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/mmp.c7
-rw-r--r--fs/ext4/super.c96
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/fat/inode.c20
-rw-r--r--fs/fscache/cache.c2
-rw-r--r--fs/fscache/cookie.c7
-rw-r--r--fs/fscache/object.c1
-rw-r--r--fs/fscache/operation.c6
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c6
-rw-r--r--fs/internal.h1
-rw-r--r--fs/iomap.c2
-rw-r--r--fs/jbd2/transaction.c9
-rw-r--r--fs/jfs/jfs_dinode.h7
-rw-r--r--fs/jfs/jfs_incore.h1
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/jfs/xattr.c10
-rw-r--r--fs/namespace.c28
-rw-r--r--fs/nfs/delegation.c4
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c21
-rw-r--r--fs/nfs/nfs4proc.c59
-rw-r--r--fs/nfs/pnfs.h5
-rw-r--r--fs/pipe.c22
-rw-r--r--fs/proc/base.c28
-rw-r--r--fs/proc/generic.c11
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/quota/dquot.c7
-rw-r--r--fs/reiserfs/prints.c141
-rw-r--r--fs/select.c23
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/squashfs/cache.c3
-rw-r--r--fs/squashfs/file.c58
-rw-r--r--fs/squashfs/file_cache.c4
-rw-r--r--fs/squashfs/file_direct.c24
-rw-r--r--fs/squashfs/fragment.c17
-rw-r--r--fs/squashfs/squashfs.h3
-rw-r--r--fs/squashfs/squashfs_fs.h6
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/timerfd.c22
-rw-r--r--fs/udf/balloc.c5
-rw-r--r--fs/udf/directory.c8
-rw-r--r--fs/udf/inode.c8
-rw-r--r--fs/udf/namei.c14
-rw-r--r--fs/udf/udfdecl.h9
-rw-r--r--fs/userfaultfd.c16
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c31
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c5
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c26
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_format.h5
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c82
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c106
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_inode.c57
-rw-r--r--fs/xfs/xfs_iomap.c15
-rw-r--r--fs/xfs/xfs_trans.c7
109 files changed, 1745 insertions, 1048 deletions
diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..27454594e37a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,7 +5,6 @@
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
- * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
@@ -165,22 +164,10 @@ struct fsync_iocb {
bool datasync;
};
-struct poll_iocb {
- struct file *file;
- __poll_t events;
- struct wait_queue_head *head;
-
- union {
- struct wait_queue_entry wait;
- struct work_struct work;
- };
-};
-
struct aio_kiocb {
union {
struct kiocb rw;
struct fsync_iocb fsync;
- struct poll_iocb poll;
};
struct kioctx *ki_ctx;
@@ -1590,6 +1577,7 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
iocb->aio_rw_flags))
return -EINVAL;
+
req->file = fget(iocb->aio_fildes);
if (unlikely(!req->file))
return -EBADF;
@@ -1604,137 +1592,6 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
return 0;
}
-/* need to use list_del_init so we can check if item was present */
-static inline bool __aio_poll_remove(struct poll_iocb *req)
-{
- if (list_empty(&req->wait.entry))
- return false;
- list_del_init(&req->wait.entry);
- return true;
-}
-
-static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
- fput(iocb->poll.file);
- aio_complete(iocb, mangle_poll(mask), 0);
-}
-
-static void aio_poll_work(struct work_struct *work)
-{
- struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
-
- if (!list_empty_careful(&iocb->ki_list))
- aio_remove_iocb(iocb);
- __aio_poll_complete(iocb, iocb->poll.events);
-}
-
-static int aio_poll_cancel(struct kiocb *iocb)
-{
- struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
- struct poll_iocb *req = &aiocb->poll;
- struct wait_queue_head *head = req->head;
- bool found = false;
-
- spin_lock(&head->lock);
- found = __aio_poll_remove(req);
- spin_unlock(&head->lock);
-
- if (found) {
- req->events = 0;
- INIT_WORK(&req->work, aio_poll_work);
- schedule_work(&req->work);
- }
- return 0;
-}
-
-static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
- void *key)
-{
- struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
- struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
- struct file *file = req->file;
- __poll_t mask = key_to_poll(key);
-
- assert_spin_locked(&req->head->lock);
-
- /* for instances that support it check for an event match first: */
- if (mask && !(mask & req->events))
- return 0;
-
- mask = file->f_op->poll_mask(file, req->events) & req->events;
- if (!mask)
- return 0;
-
- __aio_poll_remove(req);
-
- /*
- * Try completing without a context switch if we can acquire ctx_lock
- * without spinning. Otherwise we need to defer to a workqueue to
- * avoid a deadlock due to the lock order.
- */
- if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
- list_del_init(&iocb->ki_list);
- spin_unlock(&iocb->ki_ctx->ctx_lock);
-
- __aio_poll_complete(iocb, mask);
- } else {
- req->events = mask;
- INIT_WORK(&req->work, aio_poll_work);
- schedule_work(&req->work);
- }
-
- return 1;
-}
-
-static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
-{
- struct kioctx *ctx = aiocb->ki_ctx;
- struct poll_iocb *req = &aiocb->poll;
- __poll_t mask;
-
- /* reject any unknown events outside the normal event mask. */
- if ((u16)iocb->aio_buf != iocb->aio_buf)
- return -EINVAL;
- /* reject fields that are not defined for poll */
- if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
- return -EINVAL;
-
- req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
- req->file = fget(iocb->aio_fildes);
- if (unlikely(!req->file))
- return -EBADF;
- if (!file_has_poll_mask(req->file))
- goto out_fail;
-
- req->head = req->file->f_op->get_poll_head(req->file, req->events);
- if (!req->head)
- goto out_fail;
- if (IS_ERR(req->head)) {
- mask = EPOLLERR;
- goto done;
- }
-
- init_waitqueue_func_entry(&req->wait, aio_poll_wake);
- aiocb->ki_cancel = aio_poll_cancel;
-
- spin_lock_irq(&ctx->ctx_lock);
- spin_lock(&req->head->lock);
- mask = req->file->f_op->poll_mask(req->file, req->events) & req->events;
- if (!mask) {
- __add_wait_queue(req->head, &req->wait);
- list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
- }
- spin_unlock(&req->head->lock);
- spin_unlock_irq(&ctx->ctx_lock);
-done:
- if (mask)
- __aio_poll_complete(aiocb, mask);
- return 0;
-out_fail:
- fput(req->file);
- return -EINVAL; /* same as no support for IOCB_CMD_POLL */
-}
-
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
bool compat)
{
@@ -1808,9 +1665,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
case IOCB_CMD_FDSYNC:
ret = aio_fsync(&req->fsync, &iocb, true);
break;
- case IOCB_CMD_POLL:
- ret = aio_poll(req, &iocb);
- break;
default:
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
@@ -2042,6 +1896,11 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
return ret;
}
+struct __aio_sigset {
+ const sigset_t __user *sigmask;
+ size_t sigsetsize;
+};
+
SYSCALL_DEFINE6(io_pgetevents,
aio_context_t, ctx_id,
long, min_nr,
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
index 43fedde15c26..1f85d35ec8b7 100644
--- a/fs/autofs/Makefile
+++ b/fs/autofs/Makefile
@@ -2,6 +2,6 @@
# Makefile for the linux autofs-filesystem routines.
#
-obj-$(CONFIG_AUTOFS_FS) += autofs.o
+obj-$(CONFIG_AUTOFS_FS) += autofs4.o
-autofs-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
+autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index ea4ca1445ab7..86eafda4a652 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -135,6 +135,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
cmd);
goto out;
}
+ } else {
+ unsigned int inr = _IOC_NR(cmd);
+
+ if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ||
+ inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD ||
+ inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) {
+ err = -EINVAL;
+ goto out;
+ }
}
err = 0;
@@ -271,7 +280,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
dev_t devid;
int err, fd;
- /* param->path has already been checked */
+ /* param->path has been checked in validate_dev_ioctl() */
+
if (!param->openmount.devid)
return -EINVAL;
@@ -433,10 +443,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
dev_t devid;
int err = -ENOENT;
- if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
- err = -EINVAL;
- goto out;
- }
+ /* param->path has been checked in validate_dev_ioctl() */
devid = sbi->sb->s_dev;
@@ -521,10 +528,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
unsigned int devid, magic;
int err = -ENOENT;
- if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
- err = -EINVAL;
- goto out;
- }
+ /* param->path has been checked in validate_dev_ioctl() */
name = param->path;
type = param->ismountpoint.in.type;
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index cc9447e1903f..79ae07d9592f 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -23,7 +23,7 @@ static struct file_system_type autofs_fs_type = {
.kill_sb = autofs_kill_sb,
};
MODULE_ALIAS_FS("autofs");
-MODULE_ALIAS("autofs4");
+MODULE_ALIAS("autofs");
static int __init init_autofs_fs(void)
{
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0ac456b52bdd..816cc921cf36 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1259,9 +1259,8 @@ static int load_elf_library(struct file *file)
goto out_free_ph;
}
- len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
- ELF_MIN_ALIGN - 1);
- bss = eppnt->p_memsz + eppnt->p_vaddr;
+ len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
+ bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
if (bss > len) {
error = vm_brk(len, bss - len);
if (error)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0dd87aaeb39a..aba25414231a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -221,7 +221,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
- return ret;
+ goto out;
ret = bio.bi_iter.bi_size;
if (iov_iter_rw(iter) == READ) {
@@ -250,12 +250,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
put_page(bvec->bv_page);
}
- if (vecs != inline_vecs)
- kfree(vecs);
-
if (unlikely(bio.bi_status))
ret = blk_status_to_errno(bio.bi_status);
+out:
+ if (vecs != inline_vecs)
+ kfree(vecs);
+
bio_uninit(&bio);
return ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cce6087d6880..b3e45714d28f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4238,8 +4238,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
struct extent_map *em;
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
- struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
- struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree;
+ struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
+ struct extent_io_tree *tree = &btrfs_inode->io_tree;
+ struct extent_map_tree *map = &btrfs_inode->extent_tree;
if (gfpflags_allow_blocking(mask) &&
page->mapping->host->i_size > SZ_16M) {
@@ -4262,6 +4263,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
extent_map_end(em) - 1,
EXTENT_LOCKED | EXTENT_WRITEBACK,
0, NULL)) {
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &btrfs_inode->runtime_flags);
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
@@ -4542,8 +4545,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
offset_in_extent = em_start - em->start;
em_end = extent_map_end(em);
em_len = em_end - em_start;
- disko = em->block_start + offset_in_extent;
flags = 0;
+ if (em->block_start < EXTENT_MAP_LAST_BYTE)
+ disko = em->block_start + offset_in_extent;
+ else
+ disko = 0;
/*
* bump off for our next call to get_extent
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e9482f0db9d0..eba61bcb9bb3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9005,13 +9005,14 @@ again:
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
-out_unlock:
if (!ret2) {
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
}
+
+out_unlock:
unlock_page(page);
out:
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
@@ -9443,6 +9444,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
u64 new_idx = 0;
u64 root_objectid;
int ret;
+ int ret2;
bool root_log_pinned = false;
bool dest_log_pinned = false;
@@ -9639,7 +9641,8 @@ out_fail:
dest_log_pinned = false;
}
}
- ret = btrfs_end_transaction(trans);
+ ret2 = btrfs_end_transaction(trans);
+ ret = ret ? ret : ret2;
out_notrans:
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c2837a32d689..b077544b5232 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3327,11 +3327,13 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
if (pg) {
unlock_page(pg);
put_page(pg);
+ cmp->src_pages[i] = NULL;
}
pg = cmp->dst_pages[i];
if (pg) {
unlock_page(pg);
put_page(pg);
+ cmp->dst_pages[i] = NULL;
}
}
}
@@ -3577,7 +3579,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
dst, dst_loff, &cmp);
if (ret)
- goto out_unlock;
+ goto out_free;
loff += BTRFS_MAX_DEDUPE_LEN;
dst_loff += BTRFS_MAX_DEDUPE_LEN;
@@ -3587,16 +3589,16 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
ret = btrfs_extent_same_range(src, loff, tail_len, dst,
dst_loff, &cmp);
+out_free:
+ kvfree(cmp.src_pages);
+ kvfree(cmp.dst_pages);
+
out_unlock:
if (same_inode)
inode_unlock(src);
else
btrfs_double_inode_unlock(src, dst);
-out_free:
- kvfree(cmp.src_pages);
- kvfree(cmp.dst_pages);
-
return ret;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1874a6d2e6f5..c25dc47210a3 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2680,8 +2680,10 @@ out:
free_extent_buffer(scratch_leaf);
}
- if (done && !ret)
+ if (done && !ret) {
ret = 1;
+ fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+ }
return ret;
}
@@ -2784,13 +2786,20 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
if (!init_flags) {
/* we're resuming qgroup rescan at mount time */
- if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN))
+ if (!(fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
btrfs_warn(fs_info,
"qgroup rescan init failed, qgroup is not enabled");
- else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ ret = -EINVAL;
+ } else if (!(fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_ON)) {
btrfs_warn(fs_info,
"qgroup rescan init failed, qgroup rescan is not queued");
- return -EINVAL;
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ return ret;
}
mutex_lock(&fs_info->qgroup_rescan_lock);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 572306036477..6702896cdb8f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1151,11 +1151,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
return ret;
}
- if (sctx->is_dev_replace && !is_metadata && !have_csum) {
- sblocks_for_recheck = NULL;
- goto nodatasum_case;
- }
-
/*
* read all mirrors one after the other. This includes to
* re-read the extent or metadata block that failed (that was
@@ -1268,13 +1263,19 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
goto out;
}
- if (!is_metadata && !have_csum) {
+ /*
+ * NOTE: Even for nodatasum case, it's still possible that it's a
+ * compressed data extent, thus scrub_fixup_nodatasum(), which write
+ * inode page cache onto disk, could cause serious data corruption.
+ *
+ * So here we could only read from disk, and hope our recovery could
+ * reach disk before the newer write.
+ */
+ if (0 && !is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum;
WARN_ON(sctx->is_dev_replace);
-nodatasum_case:
-
/*
* !is_metadata and !have_csum, this means that the data
* might not be COWed, that it might be modified
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e034ad9e23b4..1da162928d1a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1146,6 +1146,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
{
int ret;
+ mutex_lock(&uuid_mutex);
mutex_lock(&fs_devices->device_list_mutex);
if (fs_devices->opened) {
fs_devices->opened++;
@@ -1155,6 +1156,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
ret = open_fs_devices(fs_devices, flags, holder);
}
mutex_unlock(&fs_devices->device_list_mutex);
+ mutex_unlock(&uuid_mutex);
return ret;
}
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index d9f001078e08..4a717d400807 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -218,7 +218,8 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
"%s",
fsdef->dentry->d_sb->s_id);
- fscache_object_init(&fsdef->fscache, NULL, &cache->cache);
+ fscache_object_init(&fsdef->fscache, &fscache_fsdef_index,
+ &cache->cache);
ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag);
if (ret < 0)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index ab0bbe93b398..af2b17b21b94 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -186,12 +186,12 @@ try_again:
* need to wait for it to be destroyed */
wait_for_old_object:
trace_cachefiles_wait_active(object, dentry, xobject);
+ clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
if (fscache_object_is_live(&xobject->fscache)) {
pr_err("\n");
pr_err("Error: Unexpected object collision\n");
cachefiles_printk_object(object, xobject);
- BUG();
}
atomic_inc(&xobject->usage);
write_unlock(&cache->active_lock);
@@ -248,7 +248,6 @@ wait_for_old_object:
goto try_again;
requeue:
- clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo);
_leave(" = -ETIMEDOUT");
return -ETIMEDOUT;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 5082c8a49686..40f7595aad10 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
struct cachefiles_one_read *monitor =
container_of(wait, struct cachefiles_one_read, monitor);
struct cachefiles_object *object;
+ struct fscache_retrieval *op = monitor->op;
struct wait_bit_key *key = _key;
struct page *page = wait->private;
@@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
list_del(&wait->entry);
/* move onto the action list and queue for FS-Cache thread pool */
- ASSERT(monitor->op);
+ ASSERT(op);
- object = container_of(monitor->op->op.object,
- struct cachefiles_object, fscache);
+ /* We need to temporarily bump the usage count as we don't own a ref
+ * here otherwise cachefiles_read_copier() may free the op between the
+ * monitor being enqueued on the op->to_do list and the op getting
+ * enqueued on the work queue.
+ */
+ fscache_get_retrieval(op);
+ object = container_of(op->op.object, struct cachefiles_object, fscache);
spin_lock(&object->work_lock);
- list_add_tail(&monitor->op_link, &monitor->op->to_do);
+ list_add_tail(&monitor->op_link, &op->to_do);
spin_unlock(&object->work_lock);
- fscache_enqueue_retrieval(monitor->op);
+ fscache_enqueue_retrieval(op);
+ fscache_put_retrieval(op);
return 0;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ee764ac352ab..a866be999216 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1135,6 +1135,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in));
+ dput(dn);
dn = realdn; /* note realdn contains the error */
goto out;
} else if (realdn) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 116146022aa1..bfe999505815 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -126,6 +126,25 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
seq_putc(m, '\n');
}
+static void
+cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
+{
+ struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr;
+ struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr;
+
+ seq_printf(m, "\t\tSpeed: %zu bps\n", iface->speed);
+ seq_puts(m, "\t\tCapabilities: ");
+ if (iface->rdma_capable)
+ seq_puts(m, "rdma ");
+ if (iface->rss_capable)
+ seq_puts(m, "rss ");
+ seq_putc(m, '\n');
+ if (iface->sockaddr.ss_family == AF_INET)
+ seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr);
+ else if (iface->sockaddr.ss_family == AF_INET6)
+ seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr);
+}
+
static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
{
struct list_head *tmp1, *tmp2, *tmp3;
@@ -312,6 +331,16 @@ skip_rdma:
mid_entry->mid);
}
spin_unlock(&GlobalMid_Lock);
+
+ spin_lock(&ses->iface_lock);
+ if (ses->iface_count)
+ seq_printf(m, "\n\tServer interfaces: %zu\n",
+ ses->iface_count);
+ for (j = 0; j < ses->iface_count; j++) {
+ seq_printf(m, "\t%d)\n", j);
+ cifs_dump_iface(m, &ses->iface_list[j]);
+ }
+ spin_unlock(&ses->iface_lock);
}
}
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 937251cc61c0..ee2a8ec70056 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -37,7 +37,6 @@
#include <crypto/aead.h>
int __cifs_calc_signature(struct smb_rqst *rqst,
- int start,
struct TCP_Server_Info *server, char *signature,
struct shash_desc *shash)
{
@@ -45,16 +44,27 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
int rc;
struct kvec *iov = rqst->rq_iov;
int n_vec = rqst->rq_nvec;
+ int is_smb2 = server->vals->header_preamble_size == 0;
- for (i = start; i < n_vec; i++) {
+ /* iov[0] is actual data and not the rfc1002 length for SMB2+ */
+ if (is_smb2) {
+ if (iov[0].iov_len <= 4)
+ return -EIO;
+ i = 0;
+ } else {
+ if (n_vec < 2 || iov[0].iov_len != 4)
+ return -EIO;
+ i = 1; /* skip rfc1002 length */
+ }
+
+ for (; i < n_vec; i++) {
if (iov[i].iov_len == 0)
continue;
if (iov[i].iov_base == NULL) {
cifs_dbg(VFS, "null iovec entry\n");
return -EIO;
}
- if (i == 1 && iov[1].iov_len <= 4)
- break; /* nothing to sign or corrupt header */
+
rc = crypto_shash_update(shash,
iov[i].iov_base, iov[i].iov_len);
if (rc) {
@@ -118,7 +128,7 @@ static int cifs_calc_signature(struct smb_rqst *rqst,
return rc;
}
- return __cifs_calc_signature(rqst, 1, server, signature,
+ return __cifs_calc_signature(rqst, server, signature,
&server->secmech.sdescmd5->shash);
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1efa2e65bc1a..c923c7854027 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -33,6 +33,9 @@
#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
+#define CIFS_PORT 445
+#define RFC1001_PORT 139
+
/*
* The sizes of various internal tables and strings
*/
@@ -312,6 +315,10 @@ struct smb_version_operations {
/* send echo request */
int (*echo)(struct TCP_Server_Info *);
/* create directory */
+ int (*posix_mkdir)(const unsigned int xid, struct inode *inode,
+ umode_t mode, struct cifs_tcon *tcon,
+ const char *full_path,
+ struct cifs_sb_info *cifs_sb);
int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *,
struct cifs_sb_info *);
/* set info on created directory */
@@ -416,7 +423,7 @@ struct smb_version_operations {
void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int,
bool *);
/* create lease context buffer for CREATE request */
- char * (*create_lease_buf)(u8 *, u8);
+ char * (*create_lease_buf)(u8 *lease_key, u8 oplock);
/* parse lease context buffer and return oplock/epoch info */
__u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey);
ssize_t (*copychunk_range)(const unsigned int,
@@ -838,6 +845,13 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
#endif
+struct cifs_server_iface {
+ size_t speed;
+ unsigned int rdma_capable : 1;
+ unsigned int rss_capable : 1;
+ struct sockaddr_storage sockaddr;
+};
+
/*
* Session structure. One of these for each uid session with a particular host
*/
@@ -875,6 +889,20 @@ struct cifs_ses {
#ifdef CONFIG_CIFS_SMB311
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
#endif /* 3.1.1 */
+
+ /*
+ * Network interfaces available on the server this session is
+ * connected to.
+ *
+ * Other channels can be opened by connecting and binding this
+ * session to interfaces from this list.
+ *
+ * iface_lock should be taken when accessing any of these fields
+ */
+ spinlock_t iface_lock;
+ struct cifs_server_iface *iface_list;
+ size_t iface_count;
+ unsigned long iface_last_update; /* jiffies */
};
static inline bool
@@ -883,6 +911,14 @@ cap_unix(struct cifs_ses *ses)
return ses->server->vals->cap_unix & ses->capabilities;
}
+struct cached_fid {
+ bool is_valid:1; /* Do we have a useable root fid */
+ struct cifs_fid *fid;
+ struct mutex fid_mutex;
+ struct cifs_tcon *tcon;
+ struct work_struct lease_break;
+};
+
/*
* there is one of these for each connection to a resource on a particular
* session
@@ -987,9 +1023,7 @@ struct cifs_tcon {
struct fscache_cookie *fscache; /* cookie for share */
#endif
struct list_head pending_opens; /* list of incomplete opens */
- bool valid_root_fid:1; /* Do we have a useable root fid */
- struct mutex prfid_mutex; /* prevents reopen race after dead ses*/
- struct cifs_fid *prfid; /* handle to the directory at top of share */
+ struct cached_fid crfid; /* Cached root fid */
/* BB add field for back pointer to sb struct(s)? */
};
@@ -1382,6 +1416,7 @@ typedef int (mid_handle_t)(struct TCP_Server_Info *server,
/* one of these for every pending CIFS request to the server */
struct mid_q_entry {
struct list_head qhead; /* mids waiting on reply from this server */
+ struct kref refcount;
struct TCP_Server_Info *server; /* server corresponding to this mid */
__u64 mid; /* multiplex id */
__u32 pid; /* process id */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4e0d183c3d10..1890f534c88b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -82,6 +82,7 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
struct TCP_Server_Info *server);
extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
extern void cifs_delete_mid(struct mid_q_entry *mid);
+extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry);
extern void cifs_wake_up_task(struct mid_q_entry *mid);
extern int cifs_handle_standard(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
@@ -112,10 +113,6 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
struct kvec *, int /* nvec to send */,
int * /* type of buf returned */, const int flags,
struct kvec * /* resp vec */);
-extern int smb2_send_recv(const unsigned int xid, struct cifs_ses *pses,
- struct kvec *pkvec, int nvec_to_send,
- int *pbuftype, const int flags,
- struct kvec *presp);
extern int SendReceiveBlockingLock(const unsigned int xid,
struct cifs_tcon *ptcon,
struct smb_hdr *in_buf ,
@@ -544,7 +541,7 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const unsigned char *path, char *pbuf,
unsigned int *pbytes_written);
-int __cifs_calc_signature(struct smb_rqst *rqst, int start,
+int __cifs_calc_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server, char *signature,
struct shash_desc *shash);
enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
@@ -552,6 +549,7 @@ enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
struct cifs_aio_ctx *cifs_aio_ctx_alloc(void);
void cifs_aio_ctx_release(struct kref *refcount);
int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw);
+void smb2_cached_lease_break(struct work_struct *work);
int cifs_alloc_hash(const char *name, struct crypto_shash **shash,
struct sdesc **sdesc);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 42329b25877d..93408eab92e7 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -107,10 +107,10 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
}
spin_unlock(&tcon->open_file_lock);
- mutex_lock(&tcon->prfid_mutex);
- tcon->valid_root_fid = false;
- memset(tcon->prfid, 0, sizeof(struct cifs_fid));
- mutex_unlock(&tcon->prfid_mutex);
+ mutex_lock(&tcon->crfid.fid_mutex);
+ tcon->crfid.is_valid = false;
+ memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid));
+ mutex_unlock(&tcon->crfid.fid_mutex);
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -157,8 +157,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
* greater than cifs socket timeout which is 7 seconds
*/
while (server->tcpStatus == CifsNeedReconnect) {
- wait_event_interruptible_timeout(server->response_q,
- (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
+ rc = wait_event_interruptible_timeout(server->response_q,
+ (server->tcpStatus != CifsNeedReconnect),
+ 10 * HZ);
+ if (rc < 0) {
+ cifs_dbg(FYI, "%s: aborting reconnect due to a received"
+ " signal by the process\n", __func__);
+ return -ERESTARTSYS;
+ }
/* are we still trying to reconnect? */
if (server->tcpStatus != CifsNeedReconnect)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 96645a7d8f27..5df2c0698cda 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -57,9 +57,6 @@
#include "smb2proto.h"
#include "smbdirect.h"
-#define CIFS_PORT 445
-#define RFC1001_PORT 139
-
extern mempool_t *cifs_req_poolp;
extern bool disable_legacy_dialects;
@@ -927,6 +924,7 @@ next_pdu:
server->pdu_size = next_offset;
}
+ mid_entry = NULL;
if (server->ops->is_transform_hdr &&
server->ops->receive_transform &&
server->ops->is_transform_hdr(buf)) {
@@ -941,8 +939,11 @@ next_pdu:
length = mid_entry->receive(server, mid_entry);
}
- if (length < 0)
+ if (length < 0) {
+ if (mid_entry)
+ cifs_mid_q_entry_release(mid_entry);
continue;
+ }
if (server->large_buf)
buf = server->bigbuf;
@@ -959,6 +960,8 @@ next_pdu:
if (!mid_entry->multiRsp || mid_entry->multiEnd)
mid_entry->callback(mid_entry);
+
+ cifs_mid_q_entry_release(mid_entry);
} else if (server->ops->is_oplock_break &&
server->ops->is_oplock_break(buf, server)) {
cifs_dbg(FYI, "Received oplock break\n");
@@ -3029,8 +3032,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
#ifdef CONFIG_CIFS_SMB311
if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) {
- if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+ if (ses->server->vals->protocol_id == SMB311_PROT_ID) {
tcon->posix_extensions = true;
+ printk_once(KERN_WARNING
+ "SMB3.11 POSIX Extensions are experimental\n");
+ }
}
#endif /* 311 */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f4697f548a39..a2cfb33e85c1 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1575,6 +1575,17 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
goto mkdir_out;
}
+ server = tcon->ses->server;
+
+#ifdef CONFIG_CIFS_SMB311
+ if ((server->ops->posix_mkdir) && (tcon->posix_extensions)) {
+ rc = server->ops->posix_mkdir(xid, inode, mode, tcon, full_path,
+ cifs_sb);
+ d_drop(direntry); /* for time being always refresh inode info */
+ goto mkdir_out;
+ }
+#endif /* SMB311 */
+
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_mkdir(inode, direntry, mode, full_path, cifs_sb,
@@ -1583,8 +1594,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
goto mkdir_out;
}
- server = tcon->ses->server;
-
if (!server->ops->mkdir) {
rc = -ENOSYS;
goto mkdir_out;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index af29ade195c0..53e8362cbc4a 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -82,6 +82,7 @@ sesInfoAlloc(void)
INIT_LIST_HEAD(&ret_buf->smb_ses_list);
INIT_LIST_HEAD(&ret_buf->tcon_list);
mutex_init(&ret_buf->session_mutex);
+ spin_lock_init(&ret_buf->iface_lock);
}
return ret_buf;
}
@@ -102,6 +103,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
kzfree(buf_to_free->auth_key.response);
+ kfree(buf_to_free->iface_list);
kzfree(buf_to_free);
}
@@ -117,8 +119,9 @@ tconInfoAlloc(void)
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
spin_lock_init(&ret_buf->open_file_lock);
- mutex_init(&ret_buf->prfid_mutex);
- ret_buf->prfid = kzalloc(sizeof(struct cifs_fid), GFP_KERNEL);
+ mutex_init(&ret_buf->crfid.fid_mutex);
+ ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid),
+ GFP_KERNEL);
#ifdef CONFIG_CIFS_STATS
spin_lock_init(&ret_buf->stat_lock);
#endif
@@ -136,7 +139,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
atomic_dec(&tconInfoAllocCount);
kfree(buf_to_free->nativeFileSystem);
kzfree(buf_to_free->password);
- kfree(buf_to_free->prfid);
+ kfree(buf_to_free->crfid.fid);
kfree(buf_to_free);
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index aff8ce8ba34d..646dcd149de1 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -107,6 +107,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
if (compare_mid(mid->mid, buf) &&
mid->mid_state == MID_REQUEST_SUBMITTED &&
le16_to_cpu(mid->command) == buf->Command) {
+ kref_get(&mid->refcount);
spin_unlock(&GlobalMid_Lock);
return mid;
}
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 788412675723..4ed10dd086e6 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -41,7 +41,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
int rc;
__le16 *smb2_path;
struct smb2_file_all_info *smb2_data = NULL;
- __u8 smb2_oplock[17];
+ __u8 smb2_oplock;
struct cifs_fid *fid = oparms->fid;
struct network_resiliency_req nr_ioctl_req;
@@ -59,12 +59,9 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
}
oparms->desired_access |= FILE_READ_ATTRIBUTES;
- *smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH;
+ smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH;
- if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
- memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE);
-
- rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL,
+ rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL,
NULL);
if (rc)
goto out;
@@ -101,7 +98,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
move_smb2_info_to_cifs(buf, smb2_data);
}
- *oplock = *smb2_oplock;
+ *oplock = smb2_oplock;
out:
kfree(smb2_data);
kfree(smb2_path);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index e2bec47c6845..3ff7cec2da81 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -454,7 +454,8 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
#ifdef CONFIG_CIFS_SMB311
/* SMB311 POSIX extensions paths do not include leading slash */
else if (cifs_sb_master_tlink(cifs_sb) &&
- cifs_sb_master_tcon(cifs_sb)->posix_extensions) {
+ cifs_sb_master_tcon(cifs_sb)->posix_extensions &&
+ (from[0] == '/')) {
start_of_path = from + 1;
}
#endif /* 311 */
@@ -492,10 +493,11 @@ cifs_ses_oplock_break(struct work_struct *work)
{
struct smb2_lease_break_work *lw = container_of(work,
struct smb2_lease_break_work, lease_break);
- int rc;
+ int rc = 0;
rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key,
lw->lease_state);
+
cifs_dbg(FYI, "Lease release rc %d\n", rc);
cifs_put_tlink(lw->tlink);
kfree(lw);
@@ -561,6 +563,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
open->oplock = lease_state;
}
+
return found;
}
@@ -603,6 +606,18 @@ smb2_is_valid_lease_break(char *buffer)
return true;
}
spin_unlock(&tcon->open_file_lock);
+
+ if (tcon->crfid.is_valid &&
+ !memcmp(rsp->LeaseKey,
+ tcon->crfid.fid->lease_key,
+ SMB2_LEASE_KEY_SIZE)) {
+ INIT_WORK(&tcon->crfid.lease_break,
+ smb2_cached_lease_break);
+ queue_work(cifsiod_wq,
+ &tcon->crfid.lease_break);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return true;
+ }
}
}
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b15f5957d645..ea92a38b2f08 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -203,6 +203,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
if ((mid->mid == wire_mid) &&
(mid->mid_state == MID_REQUEST_SUBMITTED) &&
(mid->command == shdr->Command)) {
+ kref_get(&mid->refcount);
spin_unlock(&GlobalMid_Lock);
return mid;
}
@@ -294,34 +295,191 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
return rsize;
}
-#ifdef CONFIG_CIFS_STATS2
+
+static int
+parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
+ size_t buf_len,
+ struct cifs_server_iface **iface_list,
+ size_t *iface_count)
+{
+ struct network_interface_info_ioctl_rsp *p;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ struct iface_info_ipv4 *p4;
+ struct iface_info_ipv6 *p6;
+ struct cifs_server_iface *info;
+ ssize_t bytes_left;
+ size_t next = 0;
+ int nb_iface = 0;
+ int rc = 0;
+
+ *iface_list = NULL;
+ *iface_count = 0;
+
+ /*
+ * Fist pass: count and sanity check
+ */
+
+ bytes_left = buf_len;
+ p = buf;
+ while (bytes_left >= sizeof(*p)) {
+ nb_iface++;
+ next = le32_to_cpu(p->Next);
+ if (!next) {
+ bytes_left -= sizeof(*p);
+ break;
+ }
+ p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
+ bytes_left -= next;
+ }
+
+ if (!nb_iface) {
+ cifs_dbg(VFS, "%s: malformed interface info\n", __func__);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (bytes_left || p->Next)
+ cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
+
+
+ /*
+ * Second pass: extract info to internal structure
+ */
+
+ *iface_list = kcalloc(nb_iface, sizeof(**iface_list), GFP_KERNEL);
+ if (!*iface_list) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ info = *iface_list;
+ bytes_left = buf_len;
+ p = buf;
+ while (bytes_left >= sizeof(*p)) {
+ info->speed = le64_to_cpu(p->LinkSpeed);
+ info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE);
+ info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE);
+
+ cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count);
+ cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
+ cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__,
+ le32_to_cpu(p->Capability));
+
+ switch (p->Family) {
+ /*
+ * The kernel and wire socket structures have the same
+ * layout and use network byte order but make the
+ * conversion explicit in case either one changes.
+ */
+ case INTERNETWORK:
+ addr4 = (struct sockaddr_in *)&info->sockaddr;
+ p4 = (struct iface_info_ipv4 *)p->Buffer;
+ addr4->sin_family = AF_INET;
+ memcpy(&addr4->sin_addr, &p4->IPv4Address, 4);
+
+ /* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */
+ addr4->sin_port = cpu_to_be16(CIFS_PORT);
+
+ cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__,
+ &addr4->sin_addr);
+ break;
+ case INTERNETWORKV6:
+ addr6 = (struct sockaddr_in6 *)&info->sockaddr;
+ p6 = (struct iface_info_ipv6 *)p->Buffer;
+ addr6->sin6_family = AF_INET6;
+ memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16);
+
+ /* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */
+ addr6->sin6_flowinfo = 0;
+ addr6->sin6_scope_id = 0;
+ addr6->sin6_port = cpu_to_be16(CIFS_PORT);
+
+ cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__,
+ &addr6->sin6_addr);
+ break;
+ default:
+ cifs_dbg(VFS,
+ "%s: skipping unsupported socket family\n",
+ __func__);
+ goto next_iface;
+ }
+
+ (*iface_count)++;
+ info++;
+next_iface:
+ next = le32_to_cpu(p->Next);
+ if (!next)
+ break;
+ p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
+ bytes_left -= next;
+ }
+
+ if (!*iface_count) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+out:
+ if (rc) {
+ kfree(*iface_list);
+ *iface_count = 0;
+ *iface_list = NULL;
+ }
+ return rc;
+}
+
+
static int
SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
{
int rc;
unsigned int ret_data_len = 0;
- struct network_interface_info_ioctl_rsp *out_buf;
+ struct network_interface_info_ioctl_rsp *out_buf = NULL;
+ struct cifs_server_iface *iface_list;
+ size_t iface_count;
+ struct cifs_ses *ses = tcon->ses;
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
NULL /* no data input */, 0 /* no data input */,
(char **)&out_buf, &ret_data_len);
- if (rc != 0)
+ if (rc != 0) {
cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc);
- else if (ret_data_len < sizeof(struct network_interface_info_ioctl_rsp)) {
- cifs_dbg(VFS, "server returned bad net interface info buf\n");
- rc = -EINVAL;
- } else {
- /* Dump info on first interface */
- cifs_dbg(FYI, "Adapter Capability 0x%x\t",
- le32_to_cpu(out_buf->Capability));
- cifs_dbg(FYI, "Link Speed %lld\n",
- le64_to_cpu(out_buf->LinkSpeed));
+ goto out;
}
+
+ rc = parse_server_interfaces(out_buf, ret_data_len,
+ &iface_list, &iface_count);
+ if (rc)
+ goto out;
+
+ spin_lock(&ses->iface_lock);
+ kfree(ses->iface_list);
+ ses->iface_list = iface_list;
+ ses->iface_count = iface_count;
+ ses->iface_last_update = jiffies;
+ spin_unlock(&ses->iface_lock);
+
+out:
kfree(out_buf);
return rc;
}
-#endif /* STATS2 */
+
+void
+smb2_cached_lease_break(struct work_struct *work)
+{
+ struct cached_fid *cfid = container_of(work,
+ struct cached_fid, lease_break);
+ mutex_lock(&cfid->fid_mutex);
+ if (cfid->is_valid) {
+ cifs_dbg(FYI, "clear cached root file handle\n");
+ SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid,
+ cfid->fid->volatile_fid);
+ cfid->is_valid = false;
+ }
+ mutex_unlock(&cfid->fid_mutex);
+}
/*
* Open the directory at the root of a share
@@ -331,13 +489,13 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
struct cifs_open_parms oparams;
int rc;
__le16 srch_path = 0; /* Null - since an open of top of share */
- u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ u8 oplock = SMB2_OPLOCK_LEVEL_II;
- mutex_lock(&tcon->prfid_mutex);
- if (tcon->valid_root_fid) {
+ mutex_lock(&tcon->crfid.fid_mutex);
+ if (tcon->crfid.is_valid) {
cifs_dbg(FYI, "found a cached root file handle\n");
- memcpy(pfid, tcon->prfid, sizeof(struct cifs_fid));
- mutex_unlock(&tcon->prfid_mutex);
+ memcpy(pfid, tcon->crfid.fid, sizeof(struct cifs_fid));
+ mutex_unlock(&tcon->crfid.fid_mutex);
return 0;
}
@@ -350,10 +508,11 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
rc = SMB2_open(xid, &oparams, &srch_path, &oplock, NULL, NULL, NULL);
if (rc == 0) {
- memcpy(tcon->prfid, pfid, sizeof(struct cifs_fid));
- tcon->valid_root_fid = true;
+ memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
+ tcon->crfid.tcon = tcon;
+ tcon->crfid.is_valid = true;
}
- mutex_unlock(&tcon->prfid_mutex);
+ mutex_unlock(&tcon->crfid.fid_mutex);
return rc;
}
@@ -383,9 +542,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
if (rc)
return;
-#ifdef CONFIG_CIFS_STATS2
SMB3_request_interfaces(xid, tcon);
-#endif /* STATS2 */
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_ATTRIBUTE_INFORMATION);
@@ -436,7 +593,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_open_parms oparms;
struct cifs_fid fid;
- if ((*full_path == 0) && tcon->valid_root_fid)
+ if ((*full_path == 0) && tcon->crfid.is_valid)
return 0;
utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
@@ -699,6 +856,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea,
len);
+ kfree(ea);
+
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
return rc;
@@ -2063,8 +2222,7 @@ smb2_create_lease_buf(u8 *lease_key, u8 oplock)
if (!buf)
return NULL;
- buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
- buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
+ memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE);
buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
buf->ccontext.DataOffset = cpu_to_le16(offsetof
@@ -2090,8 +2248,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock)
if (!buf)
return NULL;
- buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
- buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
+ memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE);
buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
buf->ccontext.DataOffset = cpu_to_le16(offsetof
@@ -2128,8 +2285,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
return SMB2_OPLOCK_LEVEL_NOCHANGE;
if (lease_key)
- memcpy(lease_key, &lc->lcontext.LeaseKeyLow,
- SMB2_LEASE_KEY_SIZE);
+ memcpy(lease_key, &lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
return le32_to_cpu(lc->lcontext.LeaseState);
}
@@ -2151,7 +2307,7 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
struct smb_rqst *old_rq)
{
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)old_rq->rq_iov[1].iov_base;
+ (struct smb2_sync_hdr *)old_rq->rq_iov[0].iov_base;
memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
@@ -2171,14 +2327,13 @@ static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
}
/* Assumes:
- * rqst->rq_iov[0] is rfc1002 length
- * rqst->rq_iov[1] is tranform header
- * rqst->rq_iov[2+] data to be encrypted/decrypted
+ * rqst->rq_iov[0] is transform header
+ * rqst->rq_iov[1+] data to be encrypted/decrypted
*/
static struct scatterlist *
init_sg(struct smb_rqst *rqst, u8 *sign)
{
- unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages;
+ unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1;
unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
struct scatterlist *sg;
unsigned int i;
@@ -2189,10 +2344,10 @@ init_sg(struct smb_rqst *rqst, u8 *sign)
return NULL;
sg_init_table(sg, sg_len);
- smb2_sg_set_buf(&sg[0], rqst->rq_iov[1].iov_base + 20, assoc_data_len);
- for (i = 1; i < rqst->rq_nvec - 1; i++)
- smb2_sg_set_buf(&sg[i], rqst->rq_iov[i+1].iov_base,
- rqst->rq_iov[i+1].iov_len);
+ smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 20, assoc_data_len);
+ for (i = 1; i < rqst->rq_nvec; i++)
+ smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
+ rqst->rq_iov[i].iov_len);
for (j = 0; i < sg_len - 1; i++, j++) {
unsigned int len, offset;
@@ -2224,18 +2379,17 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
return 1;
}
/*
- * Encrypt or decrypt @rqst message. @rqst has the following format:
- * iov[0] - rfc1002 length
- * iov[1] - transform header (associate data),
- * iov[2-N] and pages - data to encrypt.
- * On success return encrypted data in iov[2-N] and pages, leave iov[0-1]
+ * Encrypt or decrypt @rqst message. @rqst[0] has the following format:
+ * iov[0] - transform header (associate data),
+ * iov[1-N] - SMB2 header and pages - data to encrypt.
+ * On success return encrypted data in iov[1-N] and pages, leave iov[0]
* untouched.
*/
static int
crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
{
struct smb2_transform_hdr *tr_hdr =
- (struct smb2_transform_hdr *)rqst->rq_iov[1].iov_base;
+ (struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
int rc = 0;
struct scatterlist *sg;
@@ -2323,10 +2477,6 @@ free_req:
return rc;
}
-/*
- * This is called from smb_send_rqst. At this point we have the rfc1002
- * header as the first element in the vector.
- */
static int
smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
struct smb_rqst *old_rq)
@@ -2335,7 +2485,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
struct page **pages;
struct smb2_transform_hdr *tr_hdr;
unsigned int npages = old_rq->rq_npages;
- unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
+ unsigned int orig_len;
int i;
int rc = -ENOMEM;
@@ -2355,18 +2505,14 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
goto err_free_pages;
}
- /* Make space for one extra iov to hold the transform header */
iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec),
GFP_KERNEL);
if (!iov)
goto err_free_pages;
- /* copy all iovs from the old except the 1st one (rfc1002 length) */
- memcpy(&iov[2], &old_rq->rq_iov[1],
- sizeof(struct kvec) * (old_rq->rq_nvec - 1));
- /* copy the rfc1002 iov */
- iov[0].iov_base = old_rq->rq_iov[0].iov_base;
- iov[0].iov_len = old_rq->rq_iov[0].iov_len;
+ /* copy all iovs from the old */
+ memcpy(&iov[1], &old_rq->rq_iov[0],
+ sizeof(struct kvec) * old_rq->rq_nvec);
new_rq->rq_iov = iov;
new_rq->rq_nvec = old_rq->rq_nvec + 1;
@@ -2375,14 +2521,12 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
if (!tr_hdr)
goto err_free_iov;
+ orig_len = smb_rqst_len(server, old_rq);
+
/* fill the 2nd iov with a transform header */
fill_transform_hdr(tr_hdr, orig_len, old_rq);
- new_rq->rq_iov[1].iov_base = tr_hdr;
- new_rq->rq_iov[1].iov_len = sizeof(struct smb2_transform_hdr);
-
- /* Update rfc1002 header */
- inc_rfc1001_len(new_rq->rq_iov[0].iov_base,
- sizeof(struct smb2_transform_hdr));
+ new_rq->rq_iov[0].iov_base = tr_hdr;
+ new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
/* copy pages form the old */
for (i = 0; i < npages; i++) {
@@ -2426,7 +2570,7 @@ smb3_free_transform_rq(struct smb_rqst *rqst)
put_page(rqst->rq_pages[i]);
kfree(rqst->rq_pages);
/* free transform header */
- kfree(rqst->rq_iov[1].iov_base);
+ kfree(rqst->rq_iov[0].iov_base);
kfree(rqst->rq_iov);
}
@@ -2443,19 +2587,17 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
unsigned int buf_data_size, struct page **pages,
unsigned int npages, unsigned int page_data_size)
{
- struct kvec iov[3];
+ struct kvec iov[2];
struct smb_rqst rqst = {NULL};
int rc;
- iov[0].iov_base = NULL;
- iov[0].iov_len = 0;
- iov[1].iov_base = buf;
- iov[1].iov_len = sizeof(struct smb2_transform_hdr);
- iov[2].iov_base = buf + sizeof(struct smb2_transform_hdr);
- iov[2].iov_len = buf_data_size;
+ iov[0].iov_base = buf;
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
+ iov[1].iov_len = buf_data_size;
rqst.rq_iov = iov;
- rqst.rq_nvec = 3;
+ rqst.rq_nvec = 2;
rqst.rq_pages = pages;
rqst.rq_npages = npages;
rqst.rq_pagesz = PAGE_SIZE;
@@ -2467,7 +2609,7 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
if (rc)
return rc;
- memmove(buf, iov[2].iov_base, buf_data_size);
+ memmove(buf, iov[1].iov_base, buf_data_size);
server->total_read = buf_data_size + page_data_size;
@@ -3170,6 +3312,7 @@ struct smb_version_operations smb311_operations = {
.set_compression = smb2_set_compression,
.mkdir = smb2_mkdir,
.mkdir_setinfo = smb2_mkdir_setinfo,
+ .posix_mkdir = smb311_posix_mkdir,
.rmdir = smb2_rmdir,
.unlink = smb2_unlink,
.rename = smb2_rename_path,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index af032e1a3eac..3c92678cb45b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -155,7 +155,7 @@ out:
static int
smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
{
- int rc = 0;
+ int rc;
struct nls_table *nls_codepage;
struct cifs_ses *ses;
struct TCP_Server_Info *server;
@@ -166,10 +166,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
* for those three - in the calling routine.
*/
if (tcon == NULL)
- return rc;
+ return 0;
if (smb2_command == SMB2_TREE_CONNECT)
- return rc;
+ return 0;
if (tcon->tidStatus == CifsExiting) {
/*
@@ -212,8 +212,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
return -EAGAIN;
}
- wait_event_interruptible_timeout(server->response_q,
- (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
+ rc = wait_event_interruptible_timeout(server->response_q,
+ (server->tcpStatus != CifsNeedReconnect),
+ 10 * HZ);
+ if (rc < 0) {
+ cifs_dbg(FYI, "%s: aborting reconnect due to a received"
+ " signal by the process\n", __func__);
+ return -ERESTARTSYS;
+ }
/* are we still trying to reconnect? */
if (server->tcpStatus != CifsNeedReconnect)
@@ -231,7 +237,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
}
if (!tcon->ses->need_reconnect && !tcon->need_reconnect)
- return rc;
+ return 0;
nls_codepage = load_nls_default();
@@ -340,7 +346,10 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
return rc;
/* BB eventually switch this to SMB2 specific small buf size */
- *request_buf = cifs_small_buf_get();
+ if (smb2_command == SMB2_SET_INFO)
+ *request_buf = cifs_buf_get();
+ else
+ *request_buf = cifs_small_buf_get();
if (*request_buf == NULL) {
/* BB should we add a retry in here if not a writepage? */
return -ENOMEM;
@@ -602,6 +611,7 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req,
int
SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
{
+ struct smb_rqst rqst;
struct smb2_negotiate_req *req;
struct smb2_negotiate_rsp *rsp;
struct kvec iov[1];
@@ -673,7 +683,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_negotiate_rsp *)rsp_iov.iov_base;
/*
@@ -990,8 +1004,9 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
req->PreviousSessionId = sess_data->previous_session;
req->Flags = 0; /* MBZ */
- /* to enable echos and oplocks */
- req->sync_hdr.CreditRequest = cpu_to_le16(3);
+
+ /* enough to enable echos and oplocks and one max size write */
+ req->sync_hdr.CreditRequest = cpu_to_le16(130);
/* only one of SMB2 signing flags may be set in SMB2 request */
if (server->sign)
@@ -1027,6 +1042,7 @@ static int
SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
{
int rc;
+ struct smb_rqst rqst;
struct smb2_sess_setup_req *req = sess_data->iov[0].iov_base;
struct kvec rsp_iov = { NULL, 0 };
@@ -1035,10 +1051,13 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */);
req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len);
- /* BB add code to build os and lm fields */
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = sess_data->iov;
+ rqst.rq_nvec = 2;
- rc = smb2_send_recv(sess_data->xid, sess_data->ses,
- sess_data->iov, 2,
+ /* BB add code to build os and lm fields */
+ rc = cifs_send_recv(sess_data->xid, sess_data->ses,
+ &rqst,
&sess_data->buf0_type,
CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
cifs_small_buf_release(sess_data->iov[0].iov_base);
@@ -1376,6 +1395,7 @@ out:
int
SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
{
+ struct smb_rqst rqst;
struct smb2_logoff_req *req; /* response is also trivial struct */
int rc = 0;
struct TCP_Server_Info *server;
@@ -1413,7 +1433,11 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
/*
* No tcon so can't do
@@ -1443,6 +1467,7 @@ int
SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
struct cifs_tcon *tcon, const struct nls_table *cp)
{
+ struct smb_rqst rqst;
struct smb2_tree_connect_req *req;
struct smb2_tree_connect_rsp *rsp = NULL;
struct kvec iov[2];
@@ -1499,7 +1524,11 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
!smb3_encryption_required(tcon))
req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
- rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 2;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
@@ -1563,6 +1592,7 @@ tcon_error_exit:
int
SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
{
+ struct smb_rqst rqst;
struct smb2_tree_disconnect_req *req; /* response is trivial */
int rc = 0;
struct cifs_ses *ses = tcon->ses;
@@ -1593,7 +1623,11 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc)
cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE);
@@ -1682,12 +1716,12 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
static int
add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
- unsigned int *num_iovec, __u8 *oplock)
+ unsigned int *num_iovec, u8 *lease_key, __u8 *oplock)
{
struct smb2_create_req *req = iov[0].iov_base;
unsigned int num = *num_iovec;
- iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock);
+ iov[num].iov_base = server->ops->create_lease_buf(lease_key, *oplock);
if (iov[num].iov_base == NULL)
return -ENOMEM;
iov[num].iov_len = server->vals->create_lease_size;
@@ -1886,11 +1920,165 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
return 0;
}
+#ifdef CONFIG_CIFS_SMB311
+int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
+ umode_t mode, struct cifs_tcon *tcon,
+ const char *full_path,
+ struct cifs_sb_info *cifs_sb)
+{
+ struct smb_rqst rqst;
+ struct smb2_create_req *req;
+ struct smb2_create_rsp *rsp;
+ struct TCP_Server_Info *server;
+ struct cifs_ses *ses = tcon->ses;
+ struct kvec iov[3]; /* make sure at least one for each open context */
+ struct kvec rsp_iov = {NULL, 0};
+ int resp_buftype;
+ int uni_path_len;
+ __le16 *copy_path = NULL;
+ int copy_size;
+ int rc = 0;
+ unsigned int n_iov = 2;
+ __u32 file_attributes = 0;
+ char *pc_buf = NULL;
+ int flags = 0;
+ unsigned int total_len;
+ __le16 *path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+
+ if (!path)
+ return -ENOMEM;
+
+ cifs_dbg(FYI, "mkdir\n");
+
+ if (ses && (ses->server))
+ server = ses->server;
+ else
+ return -EIO;
+
+ rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len);
+
+ if (rc)
+ return rc;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+
+ req->ImpersonationLevel = IL_IMPERSONATION;
+ req->DesiredAccess = cpu_to_le32(FILE_WRITE_ATTRIBUTES);
+ /* File attributes ignored on open (used in create though) */
+ req->FileAttributes = cpu_to_le32(file_attributes);
+ req->ShareAccess = FILE_SHARE_ALL_LE;
+ req->CreateDisposition = cpu_to_le32(FILE_CREATE);
+ req->CreateOptions = cpu_to_le32(CREATE_NOT_FILE);
+
+ iov[0].iov_base = (char *)req;
+ /* -1 since last byte is buf[0] which is sent below (path) */
+ iov[0].iov_len = total_len - 1;
+
+ req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req));
+
+ /* [MS-SMB2] 2.2.13 NameOffset:
+ * If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of
+ * the SMB2 header, the file name includes a prefix that will
+ * be processed during DFS name normalization as specified in
+ * section 3.3.5.9. Otherwise, the file name is relative to
+ * the share that is identified by the TreeId in the SMB2
+ * header.
+ */
+ if (tcon->share_flags & SHI1005_FLAGS_DFS) {
+ int name_len;
+
+ req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
+ rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
+ &name_len,
+ tcon->treeName, path);
+ if (rc) {
+ cifs_small_buf_release(req);
+ return rc;
+ }
+ req->NameLength = cpu_to_le16(name_len * 2);
+ uni_path_len = copy_size;
+ path = copy_path;
+ } else {
+ uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
+ /* MUST set path len (NameLength) to 0 opening root of share */
+ req->NameLength = cpu_to_le16(uni_path_len - 2);
+ if (uni_path_len % 8 != 0) {
+ copy_size = roundup(uni_path_len, 8);
+ copy_path = kzalloc(copy_size, GFP_KERNEL);
+ if (!copy_path) {
+ cifs_small_buf_release(req);
+ return -ENOMEM;
+ }
+ memcpy((char *)copy_path, (const char *)path,
+ uni_path_len);
+ uni_path_len = copy_size;
+ path = copy_path;
+ }
+ }
+
+ iov[1].iov_len = uni_path_len;
+ iov[1].iov_base = path;
+ req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE;
+
+ if (tcon->posix_extensions) {
+ if (n_iov > 2) {
+ struct create_context *ccontext =
+ (struct create_context *)iov[n_iov-1].iov_base;
+ ccontext->Next =
+ cpu_to_le32(iov[n_iov-1].iov_len);
+ }
+
+ rc = add_posix_context(iov, &n_iov, mode);
+ if (rc) {
+ cifs_small_buf_release(req);
+ kfree(copy_path);
+ return rc;
+ }
+ pc_buf = iov[n_iov-1].iov_base;
+ }
+
+
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = n_iov;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
+ &rsp_iov);
+
+ cifs_small_buf_release(req);
+ rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
+
+ if (rc != 0) {
+ cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
+ trace_smb3_posix_mkdir_err(xid, tcon->tid, ses->Suid,
+ CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES, rc);
+ goto smb311_mkdir_exit;
+ } else
+ trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid,
+ ses->Suid, CREATE_NOT_FILE,
+ FILE_WRITE_ATTRIBUTES);
+
+ SMB2_close(xid, tcon, rsp->PersistentFileId, rsp->VolatileFileId);
+
+ /* Eventually save off posix specific response info and timestaps */
+
+smb311_mkdir_exit:
+ kfree(copy_path);
+ kfree(pc_buf);
+ free_rsp_buf(resp_buftype, rsp);
+ return rc;
+
+}
+#endif /* SMB311 */
+
int
SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
__u8 *oplock, struct smb2_file_all_info *buf,
struct kvec *err_iov, int *buftype)
{
+ struct smb_rqst rqst;
struct smb2_create_req *req;
struct smb2_create_rsp *rsp;
struct TCP_Server_Info *server;
@@ -1993,7 +2181,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
*oplock == SMB2_OPLOCK_LEVEL_NONE)
req->RequestedOplockLevel = *oplock;
else {
- rc = add_lease_context(server, iov, &n_iov, oplock);
+ rc = add_lease_context(server, iov, &n_iov,
+ oparms->fid->lease_key, oplock);
if (rc) {
cifs_small_buf_release(req);
kfree(copy_path);
@@ -2043,7 +2232,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
}
#endif /* SMB311 */
- rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = n_iov;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
&rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
@@ -2099,6 +2292,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
char *in_data, u32 indatalen,
char **out_data, u32 *plen /* returned data len */)
{
+ struct smb_rqst rqst;
struct smb2_ioctl_req *req;
struct smb2_ioctl_rsp *rsp;
struct cifs_ses *ses;
@@ -2189,7 +2383,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO)
req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
- rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = n_iov;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
&rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
@@ -2274,6 +2472,7 @@ int
SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int flags)
{
+ struct smb_rqst rqst;
struct smb2_close_req *req;
struct smb2_close_rsp *rsp;
struct cifs_ses *ses = tcon->ses;
@@ -2301,7 +2500,11 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_close_rsp *)rsp_iov.iov_base;
@@ -2387,6 +2590,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
u32 additional_info, size_t output_len, size_t min_len, void **data,
u32 *dlen)
{
+ struct smb_rqst rqst;
struct smb2_query_info_req *req;
struct smb2_query_info_rsp *rsp = NULL;
struct kvec iov[2];
@@ -2427,7 +2631,11 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
/* 1 for Buffer */
iov[0].iov_len = total_len - 1;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
@@ -2594,11 +2802,10 @@ SMB2_echo(struct TCP_Server_Info *server)
{
struct smb2_echo_req *req;
int rc = 0;
- struct kvec iov[2];
+ struct kvec iov[1];
struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = 2 };
+ .rq_nvec = 1 };
unsigned int total_len;
- __be32 rfc1002_marker;
cifs_dbg(FYI, "In echo request\n");
@@ -2614,11 +2821,8 @@ SMB2_echo(struct TCP_Server_Info *server)
req->sync_hdr.CreditRequest = cpu_to_le16(1);
- iov[0].iov_len = 4;
- rfc1002_marker = cpu_to_be32(total_len);
- iov[0].iov_base = &rfc1002_marker;
- iov[1].iov_len = total_len;
- iov[1].iov_base = (char *)req;
+ iov[0].iov_len = total_len;
+ iov[0].iov_base = (char *)req;
rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, NULL,
server, CIFS_ECHO_OP);
@@ -2633,6 +2837,7 @@ int
SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
u64 volatile_fid)
{
+ struct smb_rqst rqst;
struct smb2_flush_req *req;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[1];
@@ -2660,7 +2865,11 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc != 0) {
@@ -2848,10 +3057,9 @@ smb2_async_readv(struct cifs_readdata *rdata)
struct smb2_sync_hdr *shdr;
struct cifs_io_parms io_parms;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
- .rq_nvec = 2 };
+ .rq_nvec = 1 };
struct TCP_Server_Info *server;
unsigned int total_len;
- __be32 req_len;
cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
__func__, rdata->offset, rdata->bytes);
@@ -2882,12 +3090,8 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (smb3_encryption_required(io_parms.tcon))
flags |= CIFS_TRANSFORM_REQ;
- req_len = cpu_to_be32(total_len);
-
- rdata->iov[0].iov_base = &req_len;
- rdata->iov[0].iov_len = sizeof(__be32);
- rdata->iov[1].iov_base = buf;
- rdata->iov[1].iov_len = total_len;
+ rdata->iov[0].iov_base = buf;
+ rdata->iov[0].iov_len = total_len;
shdr = (struct smb2_sync_hdr *)buf;
@@ -2926,6 +3130,7 @@ int
SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
unsigned int *nbytes, char **buf, int *buf_type)
{
+ struct smb_rqst rqst;
int resp_buftype, rc = -EACCES;
struct smb2_read_plain_req *req = NULL;
struct smb2_read_rsp *rsp = NULL;
@@ -2946,7 +3151,11 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
@@ -3062,10 +3271,9 @@ smb2_async_writev(struct cifs_writedata *wdata,
struct smb2_sync_hdr *shdr;
struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
- struct kvec iov[2];
+ struct kvec iov[1];
struct smb_rqst rqst = { };
unsigned int total_len;
- __be32 rfc1002_marker;
rc = smb2_plain_req_init(SMB2_WRITE, tcon, (void **) &req, &total_len);
if (rc) {
@@ -3137,15 +3345,11 @@ smb2_async_writev(struct cifs_writedata *wdata,
v1->length = cpu_to_le32(wdata->mr->mr->length);
}
#endif
- /* 4 for rfc1002 length field and 1 for Buffer */
- iov[0].iov_len = 4;
- rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes);
- iov[0].iov_base = &rfc1002_marker;
- iov[1].iov_len = total_len - 1;
- iov[1].iov_base = (char *)req;
+ iov[0].iov_len = total_len - 1;
+ iov[0].iov_base = (char *)req;
rqst.rq_iov = iov;
- rqst.rq_nvec = 2;
+ rqst.rq_nvec = 1;
rqst.rq_pages = wdata->pages;
rqst.rq_offset = wdata->page_offset;
rqst.rq_npages = wdata->nr_pages;
@@ -3153,7 +3357,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
rqst.rq_tailsz = wdata->tailsz;
#ifdef CONFIG_CIFS_SMB_DIRECT
if (wdata->mr) {
- iov[1].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
+ iov[0].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
rqst.rq_npages = 0;
}
#endif
@@ -3210,6 +3414,7 @@ int
SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
unsigned int *nbytes, struct kvec *iov, int n_vec)
{
+ struct smb_rqst rqst;
int rc = 0;
struct smb2_write_req *req = NULL;
struct smb2_write_rsp *rsp = NULL;
@@ -3251,7 +3456,11 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
/* 1 for Buffer */
iov[0].iov_len = total_len - 1;
- rc = smb2_send_recv(xid, io_parms->tcon->ses, iov, n_vec + 1,
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = n_vec + 1;
+
+ rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst,
&resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
@@ -3323,6 +3532,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int index,
struct cifs_search_info *srch_inf)
{
+ struct smb_rqst rqst;
struct smb2_query_directory_req *req;
struct smb2_query_directory_rsp *rsp = NULL;
struct kvec iov[2];
@@ -3395,7 +3605,11 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
iov[1].iov_base = (char *)(req->Buffer);
iov[1].iov_len = len;
- rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 2;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base;
@@ -3454,6 +3668,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
u8 info_type, u32 additional_info, unsigned int num,
void **data, unsigned int *size)
{
+ struct smb_rqst rqst;
struct smb2_set_info_req *req;
struct smb2_set_info_rsp *rsp = NULL;
struct kvec *iov;
@@ -3509,9 +3724,13 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
iov[i].iov_len = size[i];
}
- rc = smb2_send_recv(xid, ses, iov, num, &resp_buftype, flags,
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = num;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
&rsp_iov);
- cifs_small_buf_release(req);
+ cifs_buf_release(req);
rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
if (rc != 0) {
@@ -3664,6 +3883,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
const u64 persistent_fid, const u64 volatile_fid,
__u8 oplock_level)
{
+ struct smb_rqst rqst;
int rc;
struct smb2_oplock_break *req = NULL;
struct cifs_ses *ses = tcon->ses;
@@ -3692,7 +3912,11 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc) {
@@ -3755,6 +3979,7 @@ int
SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
{
+ struct smb_rqst rqst;
struct smb2_query_info_rsp *rsp = NULL;
struct kvec iov;
struct kvec rsp_iov;
@@ -3773,7 +3998,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = &iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(iov.iov_base);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
@@ -3798,6 +4027,7 @@ int
SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int level)
{
+ struct smb_rqst rqst;
struct smb2_query_info_rsp *rsp = NULL;
struct kvec iov;
struct kvec rsp_iov;
@@ -3829,7 +4059,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = &iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(iov.iov_base);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
@@ -3868,6 +4102,7 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid,
const __u32 num_lock, struct smb2_lock_element *buf)
{
+ struct smb_rqst rqst;
int rc = 0;
struct smb2_lock_req *req = NULL;
struct kvec iov[2];
@@ -3900,7 +4135,12 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
iov[1].iov_len = count;
cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
- rc = smb2_send_recv(xid, tcon->ses, iov, 2, &resp_buf_type, flags,
+
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 2;
+
+ rc = cifs_send_recv(xid, tcon->ses, &rqst, &resp_buf_type, flags,
&rsp_iov);
cifs_small_buf_release(req);
if (rc) {
@@ -3934,6 +4174,7 @@ int
SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
__u8 *lease_key, const __le32 lease_state)
{
+ struct smb_rqst rqst;
int rc;
struct smb2_lease_ack *req = NULL;
struct cifs_ses *ses = tcon->ses;
@@ -3964,7 +4205,11 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
iov[0].iov_base = (char *)req;
iov[0].iov_len = total_len;
- rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
if (rc) {
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index a345560001ce..a671adcc44a6 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -678,16 +678,14 @@ struct create_context {
#define SMB2_LEASE_KEY_SIZE 16
struct lease_context {
- __le64 LeaseKeyLow;
- __le64 LeaseKeyHigh;
+ u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
__le32 LeaseState;
__le32 LeaseFlags;
__le64 LeaseDuration;
} __packed;
struct lease_context_v2 {
- __le64 LeaseKeyLow;
- __le64 LeaseKeyHigh;
+ u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
__le32 LeaseState;
__le32 LeaseFlags;
__le64 LeaseDuration;
@@ -851,8 +849,11 @@ struct validate_negotiate_info_rsp {
__le16 Dialect; /* Dialect in use for the connection */
} __packed;
-#define RSS_CAPABLE 0x00000001
-#define RDMA_CAPABLE 0x00000002
+#define RSS_CAPABLE cpu_to_le32(0x00000001)
+#define RDMA_CAPABLE cpu_to_le32(0x00000002)
+
+#define INTERNETWORK cpu_to_le16(0x0002)
+#define INTERNETWORKV6 cpu_to_le16(0x0017)
struct network_interface_info_ioctl_rsp {
__le32 Next; /* next interface. zero if this is last one */
@@ -860,7 +861,21 @@ struct network_interface_info_ioctl_rsp {
__le32 Capability; /* RSS or RDMA Capable */
__le32 Reserved;
__le64 LinkSpeed;
- char SockAddr_Storage[128];
+ __le16 Family;
+ __u8 Buffer[126];
+} __packed;
+
+struct iface_info_ipv4 {
+ __be16 Port;
+ __be32 IPv4Address;
+ __be64 Reserved;
+} __packed;
+
+struct iface_info_ipv6 {
+ __be16 Port;
+ __be32 FlowInfo;
+ __u8 IPv6Address[16];
+ __be32 ScopeId;
} __packed;
#define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index c84020057bd8..6e6a4f2ec890 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -79,6 +79,10 @@ extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb, bool set_alloc);
extern int smb2_set_file_info(struct inode *inode, const char *full_path,
FILE_BASIC_INFO *buf, const unsigned int xid);
+extern int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
+ umode_t mode, struct cifs_tcon *tcon,
+ const char *full_path,
+ struct cifs_sb_info *cifs_sb);
extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon,
const char *name, struct cifs_sb_info *cifs_sb);
extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
@@ -109,6 +113,8 @@ extern int smb2_unlock_range(struct cifsFileInfo *cfile,
extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
extern void smb2_reconnect_server(struct work_struct *work);
extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server);
+extern unsigned long smb_rqst_len(struct TCP_Server_Info *server,
+ struct smb_rqst *rqst);
/*
* SMB2 Worker functions - most of protocol specific implementation details
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 349d5ccf854c..719d55e63d88 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -171,10 +171,10 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
unsigned char *sigptr = smb2_signature;
struct kvec *iov = rqst->rq_iov;
- int iov_hdr_index = rqst->rq_nvec > 1 ? 1 : 0;
- struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)iov[iov_hdr_index].iov_base;
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
struct cifs_ses *ses;
+ struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash;
+ struct smb_rqst drqst;
ses = smb2_find_smb_ses(server, shdr->SessionId);
if (!ses) {
@@ -192,21 +192,39 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
}
rc = crypto_shash_setkey(server->secmech.hmacsha256,
- ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
+ ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
return rc;
}
- rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
+ rc = crypto_shash_init(shash);
if (rc) {
cifs_dbg(VFS, "%s: Could not init sha256", __func__);
return rc;
}
- rc = __cifs_calc_signature(rqst, iov_hdr_index, server, sigptr,
- &server->secmech.sdeschmacsha256->shash);
+ /*
+ * For SMB2+, __cifs_calc_signature() expects to sign only the actual
+ * data, that is, iov[0] should not contain a rfc1002 length.
+ *
+ * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to
+ * __cifs_calc_signature().
+ */
+ drqst = *rqst;
+ if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) {
+ rc = crypto_shash_update(shash, iov[0].iov_base,
+ iov[0].iov_len);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with payload\n",
+ __func__);
+ return rc;
+ }
+ drqst.rq_iov++;
+ drqst.rq_nvec--;
+ }
+ rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
if (!rc)
memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
@@ -410,14 +428,14 @@ generate_smb311signingkey(struct cifs_ses *ses)
int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
- int rc = 0;
+ int rc;
unsigned char smb3_signature[SMB2_CMACAES_SIZE];
unsigned char *sigptr = smb3_signature;
struct kvec *iov = rqst->rq_iov;
- int iov_hdr_index = rqst->rq_nvec > 1 ? 1 : 0;
- struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)iov[iov_hdr_index].iov_base;
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
struct cifs_ses *ses;
+ struct shash_desc *shash = &server->secmech.sdesccmacaes->shash;
+ struct smb_rqst drqst;
ses = smb2_find_smb_ses(server, shdr->SessionId);
if (!ses) {
@@ -429,8 +447,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
rc = crypto_shash_setkey(server->secmech.cmacaes,
- ses->smb3signingkey, SMB2_CMACAES_SIZE);
-
+ ses->smb3signingkey, SMB2_CMACAES_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__);
return rc;
@@ -441,15 +458,33 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
* so unlike smb2 case we do not have to check here if secmech are
* initialized
*/
- rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash);
+ rc = crypto_shash_init(shash);
if (rc) {
cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__);
return rc;
}
- rc = __cifs_calc_signature(rqst, iov_hdr_index, server, sigptr,
- &server->secmech.sdesccmacaes->shash);
+ /*
+ * For SMB2+, __cifs_calc_signature() expects to sign only the actual
+ * data, that is, iov[0] should not contain a rfc1002 length.
+ *
+ * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to
+ * __cifs_calc_signature().
+ */
+ drqst = *rqst;
+ if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) {
+ rc = crypto_shash_update(shash, iov[0].iov_base,
+ iov[0].iov_len);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with payload\n",
+ __func__);
+ return rc;
+ }
+ drqst.rq_iov++;
+ drqst.rq_nvec--;
+ }
+ rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
if (!rc)
memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
@@ -462,7 +497,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
int rc = 0;
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
+ (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base;
if (!(shdr->Flags & SMB2_FLAGS_SIGNED) ||
server->tcpStatus == CifsNeedNegotiate)
@@ -552,6 +587,7 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr,
temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
memset(temp, 0, sizeof(struct mid_q_entry));
+ kref_init(&temp->refcount);
temp->mid = le64_to_cpu(shdr->MessageId);
temp->pid = current->pid;
temp->command = shdr->Command; /* Always LE */
@@ -635,7 +671,7 @@ smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
{
int rc;
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
+ (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base;
struct mid_q_entry *mid;
smb2_seq_num_into_buf(ses->server, shdr);
@@ -656,7 +692,7 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
int rc;
struct smb2_sync_hdr *shdr =
- (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
+ (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base;
struct mid_q_entry *mid;
smb2_seq_num_into_buf(server, shdr);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index c4a6c6976aab..5fdb9a509a97 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -18,6 +18,7 @@
#include "smbdirect.h"
#include "cifs_debug.h"
#include "cifsproto.h"
+#include "smb2proto.h"
static struct smbd_response *get_empty_queue_buffer(
struct smbd_connection *info);
@@ -2089,12 +2090,13 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
* rqst: the data to write
* return value: 0 if successfully write, otherwise error code
*/
-int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
+int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
+ struct smbd_connection *info = server->smbd_conn;
struct kvec vec;
int nvecs;
int size;
- unsigned int buflen = 0, remaining_data_length;
+ unsigned int buflen, remaining_data_length;
int start, i, j;
int max_iov_size =
info->max_send_size - sizeof(struct smbd_data_transfer);
@@ -2118,25 +2120,13 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
log_write(ERR, "expected the pdu length in 1st iov, but got %zu\n", rqst->rq_iov[0].iov_len);
return -EINVAL;
}
- iov = &rqst->rq_iov[1];
-
- /* total up iov array first */
- for (i = 0; i < rqst->rq_nvec-1; i++) {
- buflen += iov[i].iov_len;
- }
/*
* Add in the page array if there is one. The caller needs to set
* rq_tailsz to PAGE_SIZE when the buffer has multiple pages and
* ends at page boundary
*/
- if (rqst->rq_npages) {
- if (rqst->rq_npages == 1)
- buflen += rqst->rq_tailsz;
- else
- buflen += rqst->rq_pagesz * (rqst->rq_npages - 1) -
- rqst->rq_offset + rqst->rq_tailsz;
- }
+ buflen = smb_rqst_len(server, rqst);
if (buflen + sizeof(struct smbd_data_transfer) >
info->max_fragmented_send_size) {
@@ -2146,6 +2136,8 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
goto done;
}
+ iov = &rqst->rq_iov[1];
+
cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen);
for (i = 0; i < rqst->rq_nvec-1; i++)
dump_smb(iov[i].iov_base, iov[i].iov_len);
diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
index 1e419c21dc60..a11096254f29 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/cifs/smbdirect.h
@@ -292,7 +292,7 @@ void smbd_destroy(struct smbd_connection *info);
/* Interface for carrying upper layer I/O through send/recv */
int smbd_recv(struct smbd_connection *info, struct msghdr *msg);
-int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst);
+int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst);
enum mr_state {
MR_READY,
@@ -332,7 +332,7 @@ static inline void *smbd_get_connection(
static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; }
static inline void smbd_destroy(struct smbd_connection *info) {}
static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; }
-static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; }
+static inline int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) {return -1; }
#endif
#endif
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 61e74d455d90..67e413f6ee4d 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -378,7 +378,7 @@ DEFINE_EVENT(smb3_open_err_class, smb3_##name, \
TP_ARGS(xid, tid, sesid, create_options, desired_access, rc))
DEFINE_SMB3_OPEN_ERR_EVENT(open_err);
-
+DEFINE_SMB3_OPEN_ERR_EVENT(posix_mkdir_err);
DECLARE_EVENT_CLASS(smb3_open_done_class,
TP_PROTO(unsigned int xid,
@@ -420,6 +420,7 @@ DEFINE_EVENT(smb3_open_done_class, smb3_##name, \
TP_ARGS(xid, fid, tid, sesid, create_options, desired_access))
DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
+DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done);
#endif /* _CIFS_TRACE_H */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 1f1a68f89110..a341ec839c83 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -61,6 +61,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
memset(temp, 0, sizeof(struct mid_q_entry));
+ kref_init(&temp->refcount);
temp->mid = get_mid(smb_buffer);
temp->pid = current->pid;
temp->command = cpu_to_le16(smb_buffer->Command);
@@ -82,6 +83,21 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
return temp;
}
+static void _cifs_mid_q_entry_release(struct kref *refcount)
+{
+ struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
+ refcount);
+
+ mempool_free(mid, cifs_mid_poolp);
+}
+
+void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
+{
+ spin_lock(&GlobalMid_Lock);
+ kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
+ spin_unlock(&GlobalMid_Lock);
+}
+
void
DeleteMidQEntry(struct mid_q_entry *midEntry)
{
@@ -110,7 +126,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
}
}
#endif
- mempool_free(midEntry, cifs_mid_poolp);
+ cifs_mid_q_entry_release(midEntry);
}
void
@@ -201,15 +217,25 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
return 0;
}
-static unsigned long
-rqst_len(struct smb_rqst *rqst)
+unsigned long
+smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
unsigned int i;
- struct kvec *iov = rqst->rq_iov;
+ struct kvec *iov;
+ int nvec;
unsigned long buflen = 0;
+ if (server->vals->header_preamble_size == 0 &&
+ rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) {
+ iov = &rqst->rq_iov[1];
+ nvec = rqst->rq_nvec - 1;
+ } else {
+ iov = rqst->rq_iov;
+ nvec = rqst->rq_nvec;
+ }
+
/* total up iov array first */
- for (i = 0; i < rqst->rq_nvec; i++)
+ for (i = 0; i < nvec; i++)
buflen += iov[i].iov_len;
/*
@@ -236,70 +262,88 @@ rqst_len(struct smb_rqst *rqst)
}
static int
-__smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+__smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *rqst)
{
- int rc;
- struct kvec *iov = rqst->rq_iov;
- int n_vec = rqst->rq_nvec;
- unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
- unsigned long send_length;
- unsigned int i;
+ int rc = 0;
+ struct kvec *iov;
+ int n_vec;
+ unsigned int send_length = 0;
+ unsigned int i, j;
size_t total_len = 0, sent, size;
struct socket *ssocket = server->ssocket;
struct msghdr smb_msg;
int val = 1;
+ __be32 rfc1002_marker;
+
if (cifs_rdma_enabled(server) && server->smbd_conn) {
- rc = smbd_send(server->smbd_conn, rqst);
+ rc = smbd_send(server, rqst);
goto smbd_done;
}
if (ssocket == NULL)
return -ENOTSOCK;
- /* sanity check send length */
- send_length = rqst_len(rqst);
- if (send_length != smb_buf_length + 4) {
- WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n",
- send_length, smb_buf_length);
- return -EIO;
- }
-
- if (n_vec < 2)
- return -EIO;
-
- cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length);
- dump_smb(iov[0].iov_base, iov[0].iov_len);
- dump_smb(iov[1].iov_base, iov[1].iov_len);
-
/* cork the socket */
kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
(char *)&val, sizeof(val));
- size = 0;
- for (i = 0; i < n_vec; i++)
- size += iov[i].iov_len;
+ for (j = 0; j < num_rqst; j++)
+ send_length += smb_rqst_len(server, &rqst[j]);
+ rfc1002_marker = cpu_to_be32(send_length);
- iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, iov, n_vec, size);
+ /* Generate a rfc1002 marker for SMB2+ */
+ if (server->vals->header_preamble_size == 0) {
+ struct kvec hiov = {
+ .iov_base = &rfc1002_marker,
+ .iov_len = 4
+ };
+ iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov,
+ 1, 4);
+ rc = smb_send_kvec(server, &smb_msg, &sent);
+ if (rc < 0)
+ goto uncork;
- rc = smb_send_kvec(server, &smb_msg, &sent);
- if (rc < 0)
- goto uncork;
+ total_len += sent;
+ send_length += 4;
+ }
- total_len += sent;
+ cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length);
- /* now walk the page array and send each page in it */
- for (i = 0; i < rqst->rq_npages; i++) {
- struct bio_vec bvec;
+ for (j = 0; j < num_rqst; j++) {
+ iov = rqst[j].rq_iov;
+ n_vec = rqst[j].rq_nvec;
+
+ size = 0;
+ for (i = 0; i < n_vec; i++) {
+ dump_smb(iov[i].iov_base, iov[i].iov_len);
+ size += iov[i].iov_len;
+ }
- bvec.bv_page = rqst->rq_pages[i];
- rqst_page_get_length(rqst, i, &bvec.bv_len, &bvec.bv_offset);
+ iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC,
+ iov, n_vec, size);
- iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC,
- &bvec, 1, bvec.bv_len);
rc = smb_send_kvec(server, &smb_msg, &sent);
if (rc < 0)
- break;
+ goto uncork;
total_len += sent;
+
+ /* now walk the page array and send each page in it */
+ for (i = 0; i < rqst[j].rq_npages; i++) {
+ struct bio_vec bvec;
+
+ bvec.bv_page = rqst[j].rq_pages[i];
+ rqst_page_get_length(&rqst[j], i, &bvec.bv_len,
+ &bvec.bv_offset);
+
+ iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC,
+ &bvec, 1, bvec.bv_len);
+ rc = smb_send_kvec(server, &smb_msg, &sent);
+ if (rc < 0)
+ break;
+
+ total_len += sent;
+ }
}
uncork:
@@ -308,9 +352,9 @@ uncork:
kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
(char *)&val, sizeof(val));
- if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
+ if ((total_len > 0) && (total_len != send_length)) {
cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
- smb_buf_length + 4, total_len);
+ send_length, total_len);
/*
* If we have only sent part of an SMB then the next SMB could
* be taken as the remainder of this one. We need to kill the
@@ -335,7 +379,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
int rc;
if (!(flags & CIFS_TRANSFORM_REQ))
- return __smb_send_rqst(server, rqst);
+ return __smb_send_rqst(server, 1, rqst);
if (!server->ops->init_transform_rq ||
!server->ops->free_transform_rq) {
@@ -347,7 +391,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
if (rc)
return rc;
- rc = __smb_send_rqst(server, &cur_rqst);
+ rc = __smb_send_rqst(server, 1, &cur_rqst);
server->ops->free_transform_rq(&cur_rqst);
return rc;
}
@@ -365,7 +409,7 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
iov[1].iov_base = (char *)smb_buffer + 4;
iov[1].iov_len = smb_buf_length;
- return __smb_send_rqst(server, &rqst);
+ return __smb_send_rqst(server, 1, &rqst);
}
static int
@@ -730,7 +774,6 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
* to the same server. We may make this configurable later or
* use ses->maxReq.
*/
-
rc = wait_for_free_request(ses->server, timeout, optype);
if (rc)
return rc;
@@ -766,8 +809,8 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
#ifdef CONFIG_CIFS_SMB311
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
- smb311_update_preauth_hash(ses, rqst->rq_iov+1,
- rqst->rq_nvec-1);
+ smb311_update_preauth_hash(ses, rqst->rq_iov,
+ rqst->rq_nvec);
#endif
if (timeout == CIFS_ASYNC_OP)
@@ -812,8 +855,8 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
#ifdef CONFIG_CIFS_SMB311
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
struct kvec iov = {
- .iov_base = buf,
- .iov_len = midQ->resp_buf_size
+ .iov_base = resp_iov->iov_base,
+ .iov_len = resp_iov->iov_len
};
smb311_update_preauth_hash(ses, &iov, 1);
}
@@ -872,49 +915,6 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
return rc;
}
-/* Like SendReceive2 but iov[0] does not contain an rfc1002 header */
-int
-smb2_send_recv(const unsigned int xid, struct cifs_ses *ses,
- struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
- const int flags, struct kvec *resp_iov)
-{
- struct smb_rqst rqst;
- struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
- int rc;
- int i;
- __u32 count;
- __be32 rfc1002_marker;
-
- if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
- new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec),
- GFP_KERNEL);
- if (!new_iov)
- return -ENOMEM;
- } else
- new_iov = s_iov;
-
- /* 1st iov is an RFC1002 Session Message length */
- memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
-
- count = 0;
- for (i = 1; i < n_vec + 1; i++)
- count += new_iov[i].iov_len;
-
- rfc1002_marker = cpu_to_be32(count);
-
- new_iov[0].iov_base = &rfc1002_marker;
- new_iov[0].iov_len = 4;
-
- memset(&rqst, 0, sizeof(struct smb_rqst));
- rqst.rq_iov = new_iov;
- rqst.rq_nvec = n_vec + 1;
-
- rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov);
- if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
- kfree(new_iov);
- return rc;
-}
-
int
SendReceive(const unsigned int xid, struct cifs_ses *ses,
struct smb_hdr *in_buf, struct smb_hdr *out_buf,
diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..ceb7b491d1b9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -358,14 +358,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
__releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
- bool hashed = !d_unhashed(dentry);
- if (hashed)
- raw_write_seqcount_begin(&dentry->d_seq);
+ raw_write_seqcount_begin(&dentry->d_seq);
__d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
- if (hashed)
- raw_write_seqcount_end(&dentry->d_seq);
+ raw_write_seqcount_end(&dentry->d_seq);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -1932,10 +1929,12 @@ struct dentry *d_make_root(struct inode *root_inode)
if (root_inode) {
res = d_alloc_anon(root_inode->i_sb);
- if (res)
+ if (res) {
+ res->d_flags |= DCACHE_RCUACCESS;
d_instantiate(res, root_inode);
- else
+ } else {
iput(root_inode);
+ }
}
return res;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index ceb1031f1cac..08d3bd602f73 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -101,20 +101,14 @@ static int eventfd_release(struct inode *inode, struct file *file)
return 0;
}
-static struct wait_queue_head *
-eventfd_get_poll_head(struct file *file, __poll_t events)
-{
- struct eventfd_ctx *ctx = file->private_data;
-
- return &ctx->wqh;
-}
-
-static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
+static __poll_t eventfd_poll(struct file *file, poll_table *wait)
{
struct eventfd_ctx *ctx = file->private_data;
__poll_t events = 0;
u64 count;
+ poll_wait(file, &ctx->wqh, wait);
+
/*
* All writes to ctx->count occur within ctx->wqh.lock. This read
* can be done outside ctx->wqh.lock because we know that poll_wait
@@ -156,11 +150,11 @@ static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
count = READ_ONCE(ctx->count);
if (count > 0)
- events |= (EPOLLIN & eventmask);
+ events |= EPOLLIN;
if (count == ULLONG_MAX)
events |= EPOLLERR;
if (ULLONG_MAX - 1 > count)
- events |= (EPOLLOUT & eventmask);
+ events |= EPOLLOUT;
return events;
}
@@ -311,8 +305,7 @@ static const struct file_operations eventfd_fops = {
.show_fdinfo = eventfd_show_fdinfo,
#endif
.release = eventfd_release,
- .get_poll_head = eventfd_get_poll_head,
- .poll_mask = eventfd_poll_mask,
+ .poll = eventfd_poll,
.read = eventfd_read,
.write = eventfd_write,
.llseek = noop_llseek,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ea4436f409fb..67db22fe99c5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -922,18 +922,14 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head
return 0;
}
-static struct wait_queue_head *ep_eventpoll_get_poll_head(struct file *file,
- __poll_t eventmask)
-{
- struct eventpoll *ep = file->private_data;
- return &ep->poll_wait;
-}
-
-static __poll_t ep_eventpoll_poll_mask(struct file *file, __poll_t eventmask)
+static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait)
{
struct eventpoll *ep = file->private_data;
int depth = 0;
+ /* Insert inside our poll wait queue */
+ poll_wait(file, &ep->poll_wait, wait);
+
/*
* Proceed to find out if wanted events are really available inside
* the ready list.
@@ -972,8 +968,7 @@ static const struct file_operations eventpoll_fops = {
.show_fdinfo = ep_show_fdinfo,
#endif
.release = ep_eventpoll_release,
- .get_poll_head = ep_eventpoll_get_poll_head,
- .poll_mask = ep_eventpoll_poll_mask,
+ .poll = ep_eventpoll_poll,
.llseek = noop_llseek,
};
diff --git a/fs/exec.c b/fs/exec.c
index 2d4e0075bd24..bdd0eacefdf5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -290,15 +290,15 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
struct vm_area_struct *vma = NULL;
struct mm_struct *mm = bprm->mm;
- bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ bprm->vma = vma = vm_area_alloc(mm);
if (!vma)
return -ENOMEM;
+ vma_set_anonymous(vma);
if (down_write_killable(&mm->mmap_sem)) {
err = -EINTR;
goto err_free;
}
- vma->vm_mm = mm;
/*
* Place the stack at the largest stack address the architecture
@@ -311,7 +311,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
vma->vm_start = vma->vm_end - PAGE_SIZE;
vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- INIT_LIST_HEAD(&vma->anon_vma_chain);
err = insert_vm_struct(mm, vma);
if (err)
@@ -326,7 +325,7 @@ err:
up_write(&mm->mmap_sem);
err_free:
bprm->vma = NULL;
- kmem_cache_free(vm_area_cachep, vma);
+ vm_area_free(vma);
return err;
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index cc40802ddfa8..00e759f05161 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -748,7 +748,6 @@ extern void ext2_free_blocks (struct inode *, unsigned long,
unsigned long);
extern unsigned long ext2_count_free_blocks (struct super_block *);
extern unsigned long ext2_count_dirs (struct super_block *);
-extern void ext2_check_blocks_bitmap (struct super_block *);
extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
unsigned int block_group,
struct buffer_head ** bh);
@@ -771,7 +770,6 @@ extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page
extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *);
extern void ext2_free_inode (struct inode *);
extern unsigned long ext2_count_free_inodes (struct super_block *);
-extern void ext2_check_inodes_bitmap (struct super_block *);
extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
/* inode.c */
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 25ab1274090f..8ff53f8da3bc 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -557,6 +557,9 @@ static int parse_options(char *options, struct super_block *sb,
set_opt (opts->s_mount_opt, NO_UID32);
break;
case Opt_nocheck:
+ ext2_msg(sb, KERN_WARNING,
+ "Option nocheck/check=none is deprecated and"
+ " will be removed in June 2020.");
clear_opt (opts->s_mount_opt, CHECK);
break;
case Opt_debug:
@@ -1335,9 +1338,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
new_opts.s_resgid = sbi->s_resgid;
spin_unlock(&sbi->s_lock);
- /*
- * Allow the "check" option to be passed as a remount option.
- */
if (!parse_options(data, sb, &new_opts))
return -EINVAL;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b00481c475cb..aa52d87985aa 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -184,7 +184,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
unsigned int bit, bit_max;
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start, tmp;
- int flex_bg = 0;
J_ASSERT_BH(bh, buffer_locked(bh));
@@ -207,22 +206,19 @@ static int ext4_init_block_bitmap(struct super_block *sb,
start = ext4_group_first_block_no(sb, block_group);
- if (ext4_has_feature_flex_bg(sb))
- flex_bg = 1;
-
/* Set bits for block and inode bitmaps, and inode table */
tmp = ext4_block_bitmap(sb, gdp);
- if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+ if (ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
tmp = ext4_inode_bitmap(sb, gdp);
- if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+ if (ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
tmp = ext4_inode_table(sb, gdp);
for (; tmp < ext4_inode_table(sb, gdp) +
sbi->s_itb_per_group; tmp++) {
- if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+ if (ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
}
@@ -372,6 +368,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
+ if (buffer_verified(bh))
+ goto verified;
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
desc, bh))) {
ext4_unlock_group(sb, block_group);
@@ -390,6 +388,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
return -EFSCORRUPTED;
}
set_buffer_verified(bh);
+verified:
ext4_unlock_group(sb, block_group);
return 0;
}
@@ -442,7 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
goto verify;
}
ext4_lock_group(sb, block_group);
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ if (ext4_has_group_desc_csum(sb) &&
+ (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+ if (block_group == 0) {
+ ext4_unlock_group(sb, block_group);
+ unlock_buffer(bh);
+ ext4_error(sb, "Block bitmap for bg 0 marked "
+ "uninitialized");
+ err = -EFSCORRUPTED;
+ goto out;
+ }
err = ext4_init_block_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0b127853c584..7c7123f265c2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1114,6 +1114,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
+#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
@@ -1507,11 +1508,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
- ino == EXT4_USR_QUOTA_INO ||
- ino == EXT4_GRP_QUOTA_INO ||
- ino == EXT4_BOOT_LOADER_INO ||
- ino == EXT4_JOURNAL_INO ||
- ino == EXT4_RESIZE_INO ||
(ino >= EXT4_FIRST_INO(sb) &&
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
}
@@ -3018,9 +3014,6 @@ extern int ext4_inline_data_fiemap(struct inode *inode,
struct iomap;
extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
-extern int ext4_try_to_evict_inline_data(handle_t *handle,
- struct inode *inode,
- int needed);
extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_convert_inline_data(struct inode *inode);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 98fb0c119c68..adf6668b596f 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -91,6 +91,7 @@ struct ext4_extent_header {
};
#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
+#define EXT4_MAX_EXTENT_DEPTH 5
#define EXT4_EXTENT_TAIL_OFFSET(hdr) \
(sizeof(struct ext4_extent_header) + \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0057fe3f248d..8ce6fd5b10dd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -869,6 +869,12 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
eh = ext_inode_hdr(inode);
depth = ext_depth(inode);
+ if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
+ EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
+ depth);
+ ret = -EFSCORRUPTED;
+ goto err;
+ }
if (path) {
ext4_ext_drop_refs(path);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f525f909b559..f336cbc6e932 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -90,6 +90,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
+ if (buffer_verified(bh))
+ goto verified;
blk = ext4_inode_bitmap(sb, desc);
if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
EXT4_INODES_PER_GROUP(sb) / 8)) {
@@ -101,6 +103,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
return -EFSBADCRC;
}
set_buffer_verified(bh);
+verified:
ext4_unlock_group(sb, block_group);
return 0;
}
@@ -150,7 +153,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
}
ext4_lock_group(sb, block_group);
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ if (ext4_has_group_desc_csum(sb) &&
+ (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
+ if (block_group == 0) {
+ ext4_unlock_group(sb, block_group);
+ unlock_buffer(bh);
+ ext4_error(sb, "Inode bitmap for bg 0 marked "
+ "uninitialized");
+ err = -EFSCORRUPTED;
+ goto out;
+ }
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
sb->s_blocksize * 8, bh->b_data);
@@ -994,7 +1006,8 @@ got:
/* recheck and clear flag under lock if we still need to */
ext4_lock_group(sb, group);
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
@@ -1375,7 +1388,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
ext4_itable_unused_count(sb, gdp)),
sbi->s_inodes_per_block);
- if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+ if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) ||
+ ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp)) <
+ EXT4_FIRST_INO(sb)))) {
ext4_error(sb, "Something is wrong with group %u: "
"used itable blocks: %d; "
"itable unused count: %u",
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 285ed1588730..3543fe80a3c4 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -437,6 +437,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
memset((void *)ext4_raw_inode(&is.iloc)->i_block,
0, EXT4_MIN_INLINE_DATA_SIZE);
+ memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE);
if (ext4_has_feature_extents(inode->i_sb)) {
if (S_ISDIR(inode->i_mode) ||
@@ -681,6 +682,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
goto convert;
}
+ ret = ext4_journal_get_write_access(handle, iloc.bh);
+ if (ret)
+ goto out;
+
flags |= AOP_FLAG_NOFS;
page = grab_cache_page_write_begin(mapping, 0, flags);
@@ -709,7 +714,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
out_up_read:
up_read(&EXT4_I(inode)->xattr_sem);
out:
- if (handle)
+ if (handle && (ret != 1))
ext4_journal_stop(handle);
brelse(iloc.bh);
return ret;
@@ -751,6 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
ext4_write_unlock_xattr(inode, &no_expand);
brelse(iloc.bh);
+ mark_inode_dirty(inode);
out:
return copied;
}
@@ -886,18 +892,17 @@ retry_journal:
flags |= AOP_FLAG_NOFS;
if (ret == -ENOSPC) {
+ ext4_journal_stop(handle);
ret = ext4_da_convert_inline_data_to_extent(mapping,
inode,
flags,
fsdata);
- ext4_journal_stop(handle);
if (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_journal;
goto out;
}
-
page = grab_cache_page_write_begin(mapping, 0, flags);
if (!page) {
ret = -ENOMEM;
@@ -915,6 +920,9 @@ retry_journal:
if (ret < 0)
goto out_release_page;
}
+ ret = ext4_journal_get_write_access(handle, iloc.bh);
+ if (ret)
+ goto out_release_page;
up_read(&EXT4_I(inode)->xattr_sem);
*pagep = page;
@@ -935,7 +943,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
unsigned len, unsigned copied,
struct page *page)
{
- int i_size_changed = 0;
int ret;
ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
@@ -953,10 +960,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
- if (pos+copied > inode->i_size) {
+ if (pos+copied > inode->i_size)
i_size_write(inode, pos+copied);
- i_size_changed = 1;
- }
unlock_page(page);
put_page(page);
@@ -966,8 +971,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
* ordering of page lock and transaction start for journaling
* filesystems.
*/
- if (i_size_changed)
- mark_inode_dirty(inode);
+ mark_inode_dirty(inode);
return copied;
}
@@ -1890,42 +1894,6 @@ out:
return (error < 0 ? error : 0);
}
-/*
- * Called during xattr set, and if we can sparse space 'needed',
- * just create the extent tree evict the data to the outer block.
- *
- * We use jbd2 instead of page cache to move data to the 1st block
- * so that the whole transaction can be committed as a whole and
- * the data isn't lost because of the delayed page cache write.
- */
-int ext4_try_to_evict_inline_data(handle_t *handle,
- struct inode *inode,
- int needed)
-{
- int error;
- struct ext4_xattr_entry *entry;
- struct ext4_inode *raw_inode;
- struct ext4_iloc iloc;
-
- error = ext4_get_inode_loc(inode, &iloc);
- if (error)
- return error;
-
- raw_inode = ext4_raw_inode(&iloc);
- entry = (struct ext4_xattr_entry *)((void *)raw_inode +
- EXT4_I(inode)->i_inline_off);
- if (EXT4_XATTR_LEN(entry->e_name_len) +
- EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) {
- error = -ENOSPC;
- goto out;
- }
-
- error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
-out:
- brelse(iloc.bh);
- return error;
-}
-
int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
{
handle_t *handle;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2ea07efbe016..4efe77286ecd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -402,9 +402,9 @@ static int __check_block_validity(struct inode *inode, const char *func,
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
- "lblock %lu mapped to illegal pblock "
+ "lblock %lu mapped to illegal pblock %llu "
"(length %d)", (unsigned long) map->m_lblk,
- map->m_len);
+ map->m_pblk, map->m_len);
return -EFSCORRUPTED;
}
return 0;
@@ -1389,9 +1389,10 @@ static int ext4_write_end(struct file *file,
loff_t old_size = inode->i_size;
int ret = 0, ret2;
int i_size_changed = 0;
+ int inline_data = ext4_has_inline_data(inode);
trace_ext4_write_end(inode, pos, len, copied);
- if (ext4_has_inline_data(inode)) {
+ if (inline_data) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
@@ -1419,7 +1420,7 @@ static int ext4_write_end(struct file *file,
* ordering of page lock and transaction start for journaling
* filesystems.
*/
- if (i_size_changed)
+ if (i_size_changed || inline_data)
ext4_mark_inode_dirty(handle, inode);
if (pos + len > inode->i_size && ext4_can_truncate(inode))
@@ -1493,6 +1494,7 @@ static int ext4_journalled_write_end(struct file *file,
int partial = 0;
unsigned from, to;
int size_changed = 0;
+ int inline_data = ext4_has_inline_data(inode);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_SIZE - 1);
@@ -1500,7 +1502,7 @@ static int ext4_journalled_write_end(struct file *file,
BUG_ON(!ext4_handle_valid(handle));
- if (ext4_has_inline_data(inode)) {
+ if (inline_data) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
@@ -1531,7 +1533,7 @@ static int ext4_journalled_write_end(struct file *file,
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
- if (size_changed) {
+ if (size_changed || inline_data) {
ret2 = ext4_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
@@ -2028,11 +2030,7 @@ static int __ext4_journalled_writepage(struct page *page,
}
if (inline_data) {
- BUFFER_TRACE(inode_bh, "get write access");
- ret = ext4_journal_get_write_access(handle, inode_bh);
-
- err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
-
+ ret = ext4_mark_inode_dirty(handle, inode);
} else {
ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
do_journal_get_write_access);
@@ -4506,7 +4504,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
int inodes_per_block, inode_offset;
iloc->bh = NULL;
- if (!ext4_valid_inum(sb, inode->i_ino))
+ if (inode->i_ino < EXT4_ROOT_INO ||
+ inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
return -EFSCORRUPTED;
iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6eae2b91aafa..f7ab34088162 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2423,7 +2423,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
* initialize bb_free to be able to skip
* empty groups without initialization
*/
- if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ if (ext4_has_group_desc_csum(sb) &&
+ (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
meta_group_info[i]->bb_free =
ext4_free_clusters_after_init(sb, group, desc);
} else {
@@ -2989,7 +2990,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
#endif
ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
ac->ac_b_ex.fe_len);
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb,
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 27b9a76a0dfa..638ad4743477 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -186,11 +186,8 @@ static int kmmpd(void *data)
goto exit_thread;
}
- if (sb_rdonly(sb)) {
- ext4_warning(sb, "kmmpd being stopped since filesystem "
- "has been remounted as readonly.");
- goto exit_thread;
- }
+ if (sb_rdonly(sb))
+ break;
diff = jiffies - last_update_time;
if (diff < mmp_update_interval * HZ)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0c4c2201b3aa..b7f7922061be 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -405,6 +405,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
static void ext4_handle_error(struct super_block *sb)
{
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
+
if (sb_rdonly(sb))
return;
@@ -740,6 +743,9 @@ __acquires(bitlock)
va_end(args);
}
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
+
if (test_opt(sb, ERRORS_CONT)) {
ext4_commit_super(sb, 0);
return;
@@ -1371,7 +1377,8 @@ enum {
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
+ Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
@@ -1438,6 +1445,8 @@ static const match_table_t tokens = {
{Opt_dax, "dax"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
+ {Opt_warn_on_error, "warn_on_error"},
+ {Opt_nowarn_on_error, "nowarn_on_error"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
@@ -1602,6 +1611,8 @@ static const struct mount_opts {
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
MOPT_EXT4_ONLY | MOPT_CLEAR},
+ {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
+ {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
MOPT_EXT4_ONLY | MOPT_CLEAR},
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
@@ -2331,6 +2342,7 @@ static int ext4_check_descriptors(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
ext4_fsblk_t last_block;
+ ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
ext4_fsblk_t block_bitmap;
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
@@ -2363,6 +2375,14 @@ static int ext4_check_descriptors(struct super_block *sb,
if (!sb_rdonly(sb))
return 0;
}
+ if (block_bitmap >= sb_block + 1 &&
+ block_bitmap <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Block bitmap for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Block bitmap for group %u not in group "
@@ -2377,6 +2397,14 @@ static int ext4_check_descriptors(struct super_block *sb,
if (!sb_rdonly(sb))
return 0;
}
+ if (inode_bitmap >= sb_block + 1 &&
+ inode_bitmap <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode bitmap for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
if (inode_bitmap < first_block || inode_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Inode bitmap for group %u not in group "
@@ -2391,6 +2419,14 @@ static int ext4_check_descriptors(struct super_block *sb,
if (!sb_rdonly(sb))
return 0;
}
+ if (inode_table >= sb_block + 1 &&
+ inode_table <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode table for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
if (inode_table < first_block ||
inode_table + sbi->s_itb_per_group - 1 > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -3097,6 +3133,9 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
struct ext4_group_desc *gdp = NULL;
+ if (!ext4_has_group_desc_csum(sb))
+ return ngroups;
+
for (group = 0; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp)
@@ -3742,6 +3781,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
le32_to_cpu(es->s_log_block_size));
goto failed_mount;
}
+ if (le32_to_cpu(es->s_log_cluster_size) >
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log cluster size: %u",
+ le32_to_cpu(es->s_log_cluster_size));
+ goto failed_mount;
+ }
if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
ext4_msg(sb, KERN_ERR,
@@ -3806,6 +3852,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
} else {
sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+ if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+ ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+ sbi->s_first_ino);
+ goto failed_mount;
+ }
if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
(!is_power_of_2(sbi->s_inode_size)) ||
(sbi->s_inode_size > blocksize)) {
@@ -3882,13 +3933,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"block size (%d)", clustersize, blocksize);
goto failed_mount;
}
- if (le32_to_cpu(es->s_log_cluster_size) >
- (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log cluster size: %u",
- le32_to_cpu(es->s_log_cluster_size));
- goto failed_mount;
- }
sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
le32_to_cpu(es->s_log_block_size);
sbi->s_clusters_per_group =
@@ -3909,10 +3953,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
} else {
if (clustersize != blocksize) {
- ext4_warning(sb, "fragment/cluster size (%d) != "
- "block size (%d)", clustersize,
- blocksize);
- clustersize = blocksize;
+ ext4_msg(sb, KERN_ERR,
+ "fragment/cluster size (%d) != "
+ "block size (%d)", clustersize, blocksize);
+ goto failed_mount;
}
if (sbi->s_blocks_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR,
@@ -3966,6 +4010,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_blocks_count(es));
goto failed_mount;
}
+ if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
+ (sbi->s_cluster_ratio == 1)) {
+ ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
+ "block is 0 with a 1k block and cluster size");
+ goto failed_mount;
+ }
+
blocks_count = (ext4_blocks_count(es) -
le32_to_cpu(es->s_first_data_block) +
EXT4_BLOCKS_PER_GROUP(sb) - 1);
@@ -4001,6 +4052,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ret = -ENOMEM;
goto failed_mount;
}
+ if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+ le32_to_cpu(es->s_inodes_count),
+ ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+ ret = -EINVAL;
+ goto failed_mount;
+ }
bgl_lock_init(sbi->s_blockgroup_lock);
@@ -4020,14 +4079,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
+ sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
ret = -EFSCORRUPTED;
goto failed_mount2;
}
- sbi->s_gdb_count = db_count;
-
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
/* Register extent status tree shrinker */
@@ -4736,6 +4794,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
if (!sbh || block_device_ejected(sb))
return error;
+
+ /*
+ * The superblock bh should be mapped, but it might not be if the
+ * device was hot-removed. Not much we can do but fail the I/O.
+ */
+ if (!buffer_mapped(sbh))
+ return error;
+
/*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
@@ -5140,6 +5206,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal)
ext4_mark_recovery_complete(sb, es);
+ if (sbi->s_mmp_tsk)
+ kthread_stop(sbi->s_mmp_tsk);
} else {
/* Make sure we can mount this feature set readwrite */
if (ext4_has_feature_readonly(sb) ||
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fc4ced59c565..723df14f4084 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -230,12 +230,12 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
{
int error = -EFSCORRUPTED;
- if (buffer_verified(bh))
- return 0;
-
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1))
goto errout;
+ if (buffer_verified(bh))
+ return 0;
+
error = -EFSBADCRC;
if (!ext4_xattr_block_csum_verify(inode, bh))
goto errout;
@@ -1560,7 +1560,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
handle_t *handle, struct inode *inode,
bool is_block)
{
- struct ext4_xattr_entry *last;
+ struct ext4_xattr_entry *last, *next;
struct ext4_xattr_entry *here = s->here;
size_t min_offs = s->end - s->base, name_len = strlen(i->name);
int in_inode = i->in_inode;
@@ -1595,7 +1595,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
/* Compute min_offs and last. */
last = s->first;
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+ for (; !IS_LAST_ENTRY(last); last = next) {
+ next = EXT4_XATTR_NEXT(last);
+ if ((void *)next >= s->end) {
+ EXT4_ERROR_INODE(inode, "corrupted xattr entries");
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
if (!last->e_value_inum && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
@@ -2206,23 +2212,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
if (EXT4_I(inode)->i_extra_isize == 0)
return -ENOSPC;
error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
- if (error) {
- if (error == -ENOSPC &&
- ext4_has_inline_data(inode)) {
- error = ext4_try_to_evict_inline_data(handle, inode,
- EXT4_XATTR_LEN(strlen(i->name) +
- EXT4_XATTR_SIZE(i->value_len)));
- if (error)
- return error;
- error = ext4_xattr_ibody_find(inode, i, is);
- if (error)
- return error;
- error = ext4_xattr_set_entry(i, s, handle, inode,
- false /* is_block */);
- }
- if (error)
- return error;
- }
+ if (error)
+ return error;
header = IHDR(inode, ext4_raw_inode(&is->iloc));
if (!IS_LAST_ENTRY(s->first)) {
header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
@@ -2651,6 +2642,11 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
last = IFIRST(header);
/* Find the entry best suited to be pushed into EA block */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+ /* never move system.data out of the inode */
+ if ((last->e_name_len == 4) &&
+ (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) &&
+ !memcmp(last->e_name, "data", 4))
+ continue;
total_size = EXT4_XATTR_LEN(last->e_name_len);
if (!last->e_value_inum)
total_size += EXT4_XATTR_SIZE(
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 065dc919a0ce..bfd589ea74c0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -707,13 +707,21 @@ static void fat_set_state(struct super_block *sb,
brelse(bh);
}
+static void fat_reset_iocharset(struct fat_mount_options *opts)
+{
+ if (opts->iocharset != fat_default_iocharset) {
+ /* Note: opts->iocharset can be NULL here */
+ kfree(opts->iocharset);
+ opts->iocharset = fat_default_iocharset;
+ }
+}
+
static void delayed_free(struct rcu_head *p)
{
struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
unload_nls(sbi->nls_disk);
unload_nls(sbi->nls_io);
- if (sbi->options.iocharset != fat_default_iocharset)
- kfree(sbi->options.iocharset);
+ fat_reset_iocharset(&sbi->options);
kfree(sbi);
}
@@ -1132,7 +1140,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
opts->fs_fmask = opts->fs_dmask = current_umask();
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
- opts->iocharset = fat_default_iocharset;
+ fat_reset_iocharset(opts);
if (is_vfat) {
opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
opts->rodir = 0;
@@ -1289,8 +1297,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
/* vfat specific */
case Opt_charset:
- if (opts->iocharset != fat_default_iocharset)
- kfree(opts->iocharset);
+ fat_reset_iocharset(opts);
iocharset = match_strdup(&args[0]);
if (!iocharset)
return -ENOMEM;
@@ -1881,8 +1888,7 @@ out_fail:
iput(fat_inode);
unload_nls(sbi->nls_io);
unload_nls(sbi->nls_disk);
- if (sbi->options.iocharset != fat_default_iocharset)
- kfree(sbi->options.iocharset);
+ fat_reset_iocharset(&sbi->options);
sb->s_fs_info = NULL;
kfree(sbi);
return error;
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index c184c5a356ff..cdcb376ef8df 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -220,6 +220,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
struct fscache_cache_tag *tag;
+ ASSERTCMP(ifsdef->cookie, ==, &fscache_fsdef_index);
BUG_ON(!cache->ops);
BUG_ON(!ifsdef);
@@ -248,7 +249,6 @@ int fscache_add_cache(struct fscache_cache *cache,
if (!cache->kobj)
goto error;
- ifsdef->cookie = &fscache_fsdef_index;
ifsdef->cache = cache;
cache->fsdef = ifsdef;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 97137d7ec5ee..83bfe04456b6 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -516,6 +516,7 @@ static int fscache_alloc_object(struct fscache_cache *cache,
goto error;
}
+ ASSERTCMP(object->cookie, ==, cookie);
fscache_stat(&fscache_n_object_alloc);
object->debug_id = atomic_inc_return(&fscache_object_debug_id);
@@ -571,6 +572,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
_enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
+ ASSERTCMP(object->cookie, ==, cookie);
+
spin_lock(&cookie->lock);
/* there may be multiple initial creations of this object, but we only
@@ -610,9 +613,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
spin_unlock(&cache->object_list_lock);
}
- /* attach to the cookie */
- object->cookie = cookie;
- fscache_cookie_get(cookie, fscache_cookie_get_attach_object);
+ /* Attach to the cookie. The object already has a ref on it. */
hlist_add_head(&object->cookie_link, &cookie->backing_objects);
fscache_objlist_add(object);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 20e0d0a4dc8c..9edc920f651f 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -327,6 +327,7 @@ void fscache_object_init(struct fscache_object *object,
object->store_limit_l = 0;
object->cache = cache;
object->cookie = cookie;
+ fscache_cookie_get(cookie, fscache_cookie_get_attach_object);
object->parent = NULL;
#ifdef CONFIG_FSCACHE_OBJECT_LIST
RB_CLEAR_NODE(&object->objlist_link);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index e30c5975ea58..8d265790374c 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -70,7 +70,8 @@ void fscache_enqueue_operation(struct fscache_operation *op)
ASSERT(op->processor != NULL);
ASSERT(fscache_object_is_available(op->object));
ASSERTCMP(atomic_read(&op->usage), >, 0);
- ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
+ ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS,
+ op->state, ==, FSCACHE_OP_ST_CANCELLED);
fscache_stat(&fscache_n_op_enqueue);
switch (op->flags & FSCACHE_OP_TYPE) {
@@ -499,7 +500,8 @@ void fscache_put_operation(struct fscache_operation *op)
struct fscache_cache *cache;
_enter("{OBJ%x OP%x,%d}",
- op->object->debug_id, op->debug_id, atomic_read(&op->usage));
+ op->object ? op->object->debug_id : 0,
+ op->debug_id, atomic_read(&op->usage));
ASSERTCMP(atomic_read(&op->usage), >, 0);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d508c7844681..40d4c66c7751 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -411,6 +411,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
bool truncate_op = (lend == LLONG_MAX);
memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+ vma_init(&pseudo_vma, current->mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pagevec_init(&pvec);
next = start;
@@ -595,6 +596,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* as input to create an allocation policy.
*/
memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+ vma_init(&pseudo_vma, mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..8c86c809ca17 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1999,8 +1999,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
inode->i_uid = current_fsuid();
if (dir && dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
+
+ /* Directories are special, and always inherit S_ISGID */
if (S_ISDIR(mode))
mode |= S_ISGID;
+ else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+ !in_group_p(inode->i_gid) &&
+ !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+ mode &= ~S_ISGID;
} else
inode->i_gid = current_fsgid();
inode->i_mode = mode;
diff --git a/fs/internal.h b/fs/internal.h
index 980d005b21b4..5645b4ebf494 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -127,7 +127,6 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
-extern struct file *filp_clone_open(struct file *);
/*
* inode.c
diff --git a/fs/iomap.c b/fs/iomap.c
index 77397b5a96ef..0d0bd8845586 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1443,7 +1443,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops)
{
struct inode *inode = mapping->host;
- loff_t pos = bno >> inode->i_blkbits;
+ loff_t pos = bno << inode->i_blkbits;
unsigned blocksize = i_blocksize(inode);
if (filemap_write_and_wait(mapping))
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 51dd68e67b0f..c0b66a7a795b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1361,6 +1361,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) {
jbd_lock_bh_state(bh);
+ if (jh->b_transaction == transaction &&
+ jh->b_jlist != BJ_Metadata)
+ pr_err("JBD2: assertion failure: h_type=%u "
+ "h_line_no=%u block_no=%llu jlist=%u\n",
+ handle->h_type, handle->h_line_no,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_jlist);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
jbd_unlock_bh_state(bh);
@@ -1380,11 +1387,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* of the transaction. This needs to be done
* once a transaction -bzzz
*/
- jh->b_modified = 1;
if (handle->h_buffer_credits <= 0) {
ret = -ENOSPC;
goto out_unlock_bh;
}
+ jh->b_modified = 1;
handle->h_buffer_credits--;
}
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 395c4c0d0f06..1682a87c00b2 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -115,6 +115,13 @@ struct dinode {
dxd_t _dxd; /* 16: */
union {
__le32 _rdev; /* 4: */
+ /*
+ * The fast symlink area
+ * is expected to overflow
+ * into _inlineea when
+ * needed (which will clear
+ * INLINEEA).
+ */
u8 _fastsymlink[128];
} _u;
u8 _inlineea[128];
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 1f26d1910409..9940a1e04cbf 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -87,6 +87,7 @@ struct jfs_inode_info {
struct {
unchar _unused[16]; /* 16: */
dxd_t _dxd; /* 16: */
+ /* _inline may overflow into _inline_ea when needed */
unchar _inline[128]; /* 128: inline symlink */
/* _inline_ea may overlay the last part of
* file._xtroot if maxentry = XTROOTINITSLOT
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1b9264fd54b6..f08571433aba 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -967,8 +967,7 @@ static int __init init_jfs_fs(void)
jfs_inode_cachep =
kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info),
0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
- offsetof(struct jfs_inode_info, i_inline),
- sizeof_field(struct jfs_inode_info, i_inline),
+ offsetof(struct jfs_inode_info, i_inline), IDATASIZE,
init_once);
if (jfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index c60f3d32ee91..a6797986b625 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -491,15 +491,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
if (size > PSIZE) {
/*
* To keep the rest of the code simple. Allocate a
- * contiguous buffer to work with
+ * contiguous buffer to work with. Make the buffer large
+ * enough to make use of the whole extent.
*/
- ea_buf->xattr = kmalloc(size, GFP_KERNEL);
+ ea_buf->max_size = (size + sb->s_blocksize - 1) &
+ ~(sb->s_blocksize - 1);
+
+ ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL);
if (ea_buf->xattr == NULL)
return -ENOMEM;
ea_buf->flag = EA_MALLOC;
- ea_buf->max_size = (size + sb->s_blocksize - 1) &
- ~(sb->s_blocksize - 1);
if (ea_size == 0)
return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index 8ddd14806799..bd2f4c68506a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
return 0;
mnt = real_mount(bastard);
mnt_add_count(mnt, 1);
+ smp_mb(); // see mntput_no_expire()
if (likely(!read_seqretry(&mount_lock, seq)))
return 0;
if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
mnt_add_count(mnt, -1);
return 1;
}
+ lock_mount_hash();
+ if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+ mnt_add_count(mnt, -1);
+ unlock_mount_hash();
+ return 1;
+ }
+ unlock_mount_hash();
+ /* caller will mntput() */
return -1;
}
@@ -1195,12 +1204,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
static void mntput_no_expire(struct mount *mnt)
{
rcu_read_lock();
- mnt_add_count(mnt, -1);
- if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+ if (likely(READ_ONCE(mnt->mnt_ns))) {
+ /*
+ * Since we don't do lock_mount_hash() here,
+ * ->mnt_ns can change under us. However, if it's
+ * non-NULL, then there's a reference that won't
+ * be dropped until after an RCU delay done after
+ * turning ->mnt_ns NULL. So if we observe it
+ * non-NULL under rcu_read_lock(), the reference
+ * we are dropping is not the final one.
+ */
+ mnt_add_count(mnt, -1);
rcu_read_unlock();
return;
}
lock_mount_hash();
+ /*
+ * make sure that if __legitimize_mnt() has not seen us grab
+ * mount_lock, we'll see their refcount increment here.
+ */
+ smp_mb();
+ mnt_add_count(mnt, -1);
if (mnt_get_count(mnt)) {
rcu_read_unlock();
unlock_mount_hash();
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index bbd0465535eb..f033f3a69a3b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -883,8 +883,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
res = nfs_delegation_find_inode_server(server, fhandle);
- if (res != ERR_PTR(-ENOENT))
+ if (res != ERR_PTR(-ENOENT)) {
+ rcu_read_unlock();
return res;
+ }
}
rcu_read_unlock();
return ERR_PTR(-ENOENT);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index d4a07acad598..8f003792ccde 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1243,17 +1243,18 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
+ clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
+ clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
if (ff_layout_choose_best_ds_for_read(hdr->lseg,
hdr->pgio_mirror_idx + 1,
&hdr->pgio_mirror_idx))
goto out_eagain;
- ff_layout_read_record_layoutstats_done(task, hdr);
- pnfs_read_resend_pnfs(hdr);
+ set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
- ff_layout_reset_read(hdr);
+ set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
return task->tk_status;
case -EAGAIN:
goto out_eagain;
@@ -1403,6 +1404,10 @@ static void ff_layout_read_release(void *data)
struct nfs_pgio_header *hdr = data;
ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
+ if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+ pnfs_read_resend_pnfs(hdr);
+ else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+ ff_layout_reset_read(hdr);
pnfs_generic_rw_release(data);
}
@@ -1423,12 +1428,14 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
+ clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
+ clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
- ff_layout_reset_write(hdr, true);
+ set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
- ff_layout_reset_write(hdr, false);
+ set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
return task->tk_status;
case -EAGAIN:
return -EAGAIN;
@@ -1575,6 +1582,10 @@ static void ff_layout_write_release(void *data)
struct nfs_pgio_header *hdr = data;
ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
+ if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+ ff_layout_reset_write(hdr, true);
+ else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+ ff_layout_reset_write(hdr, false);
pnfs_generic_rw_release(data);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ed45090e4df6..f6c4ccd693f4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3294,6 +3294,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ struct pnfs_layout_hdr *lo;
bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
@@ -3337,6 +3338,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
goto out_wait;
}
+ lo = calldata->arg.lr_args ? calldata->arg.lr_args->layout : NULL;
+ if (lo && !pnfs_layout_is_valid(lo)) {
+ calldata->arg.lr_args = NULL;
+ calldata->res.lr_res = NULL;
+ }
+
if (calldata->arg.fmode == 0)
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
@@ -5972,12 +5979,19 @@ static void nfs4_delegreturn_release(void *calldata)
static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
{
struct nfs4_delegreturndata *d_data;
+ struct pnfs_layout_hdr *lo;
d_data = (struct nfs4_delegreturndata *)data;
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
return;
+ lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL;
+ if (lo && !pnfs_layout_is_valid(lo)) {
+ d_data->args.lr_args = NULL;
+ d_data->res.lr_res = NULL;
+ }
+
nfs4_setup_sequence(d_data->res.server->nfs_client,
&d_data->args.seq_args,
&d_data->res.seq_res,
@@ -6452,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
- break;
+ goto out_restart;
}
-
if (data->arg.new_lock_owner != 0) {
nfs_confirm_seqid(&lsp->ls_seqid, 0);
nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
- goto out_done;
- } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid))
- goto out_done;
-
+ } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
+ goto out_restart;
break;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
if (data->arg.new_lock_owner != 0) {
- if (nfs4_stateid_match(&data->arg.open_stateid,
+ if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
- goto out_done;
- } else if (nfs4_stateid_match(&data->arg.lock_stateid,
+ goto out_restart;
+ } else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
- goto out_done;
+ goto out_restart;
}
- if (!data->cancelled)
- rpc_restart_call_prepare(task);
out_done:
dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
+ return;
+out_restart:
+ if (!data->cancelled)
+ rpc_restart_call_prepare(task);
+ goto out_done;
}
static void nfs4_lock_release(void *calldata)
@@ -6488,7 +6502,7 @@ static void nfs4_lock_release(void *calldata)
dprintk("%s: begin!\n", __func__);
nfs_free_seqid(data->arg.open_seqid);
- if (data->cancelled) {
+ if (data->cancelled && data->rpc_status == 0) {
struct rpc_task *task;
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
@@ -8650,6 +8664,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
+
switch (nfs4err) {
case 0:
goto out;
@@ -8714,7 +8730,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
goto out;
}
- nfs4_sequence_free_slot(&lgp->res.seq_res);
err = nfs4_handle_exception(server, nfs4err, exception);
if (!status) {
if (exception->retry)
@@ -8786,20 +8801,22 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
if (IS_ERR(task))
return ERR_CAST(task);
status = rpc_wait_for_completion_task(task);
- if (status == 0) {
+ if (status != 0)
+ goto out;
+
+ /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
+ if (task->tk_status < 0 || lgp->res.layoutp->len == 0) {
status = nfs4_layoutget_handle_exception(task, lgp, &exception);
*timeout = exception.timeout;
- }
-
+ } else
+ lseg = pnfs_layout_process(lgp);
+out:
trace_nfs4_layoutget(lgp->args.ctx,
&lgp->args.range,
&lgp->res.range,
&lgp->res.stateid,
status);
- /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
- if (status == 0 && lgp->res.layoutp->len)
- lseg = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
@@ -8817,6 +8834,8 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
&lrp->args.seq_args,
&lrp->res.seq_res,
task);
+ if (!pnfs_layout_is_valid(lrp->args.layout))
+ rpc_exit(task, 0);
}
static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a8f5e6b16749..3fe81424337d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -801,6 +801,11 @@ static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
{
}
+static inline bool pnfs_layout_is_valid(const struct pnfs_layout_hdr *lo)
+{
+ return false;
+}
+
#endif /* CONFIG_NFS_V4_1 */
#if IS_ENABLED(CONFIG_NFS_V4_2)
diff --git a/fs/pipe.c b/fs/pipe.c
index bb0840e234f3..39d6f431da83 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -509,22 +509,19 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
}
-static struct wait_queue_head *
-pipe_get_poll_head(struct file *filp, __poll_t events)
-{
- struct pipe_inode_info *pipe = filp->private_data;
-
- return &pipe->wait;
-}
-
/* No kernel lock held - fine */
-static __poll_t pipe_poll_mask(struct file *filp, __poll_t events)
+static __poll_t
+pipe_poll(struct file *filp, poll_table *wait)
{
+ __poll_t mask;
struct pipe_inode_info *pipe = filp->private_data;
- int nrbufs = pipe->nrbufs;
- __poll_t mask = 0;
+ int nrbufs;
+
+ poll_wait(filp, &pipe->wait, wait);
/* Reading only -- no need for acquiring the semaphore. */
+ nrbufs = pipe->nrbufs;
+ mask = 0;
if (filp->f_mode & FMODE_READ) {
mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
if (!pipe->writers && filp->f_version != pipe->w_counter)
@@ -1023,8 +1020,7 @@ const struct file_operations pipefifo_fops = {
.llseek = no_llseek,
.read_iter = pipe_read,
.write_iter = pipe_write,
- .get_poll_head = pipe_get_poll_head,
- .poll_mask = pipe_poll_mask,
+ .poll = pipe_poll,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b6572944efc3..aaffc0c30216 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -235,6 +235,10 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
if (env_start != arg_end || env_start >= env_end)
env_start = env_end = arg_end;
+ /* .. and limit it to a maximum of one page of slop */
+ if (env_end >= arg_end + PAGE_SIZE)
+ env_end = arg_end + PAGE_SIZE - 1;
+
/* We're not going to care if "*ppos" has high bits set */
pos = arg_start + *ppos;
@@ -254,10 +258,19 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
while (count) {
int got;
size_t size = min_t(size_t, PAGE_SIZE, count);
+ long offset;
- got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
- if (got <= 0)
+ /*
+ * Are we already starting past the official end?
+ * We always include the last byte that is *supposed*
+ * to be NUL
+ */
+ offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
+
+ got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
+ if (got <= offset)
break;
+ got -= offset;
/* Don't walk past a NUL character once you hit arg_end */
if (pos + got >= arg_end) {
@@ -276,12 +289,17 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
n = arg_end - pos - 1;
/* Cut off at first NUL after 'n' */
- got = n + strnlen(page+n, got-n);
- if (!got)
+ got = n + strnlen(page+n, offset+got-n);
+ if (got < offset)
break;
+ got -= offset;
+
+ /* Include the NUL if it existed */
+ if (got < size)
+ got++;
}
- got -= copy_to_user(buf, page, got);
+ got -= copy_to_user(buf, page+offset, got);
if (unlikely(!got)) {
if (!len)
len = -EFAULT;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 6ac1c92997ea..bb1c1625b158 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -564,11 +564,20 @@ static int proc_seq_open(struct inode *inode, struct file *file)
return seq_open(file, de->seq_ops);
}
+static int proc_seq_release(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *de = PDE(inode);
+
+ if (de->state_size)
+ return seq_release_private(inode, file);
+ return seq_release(inode, file);
+}
+
static const struct file_operations proc_seq_fops = {
.open = proc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = proc_seq_release,
};
struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e9679016271f..dfd73a4616ce 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -831,7 +831,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
SEQ_PUT_DEC(" kB\nSwapPss: ",
mss->swap_pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nLocked: ",
+ mss->pss_locked >> PSS_SHIFT);
seq_puts(m, " kB\n");
}
if (!rollup_mode) {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d88231e3b2be..fc20e06c56ba 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -711,21 +711,18 @@ EXPORT_SYMBOL(dquot_quota_sync);
static unsigned long
dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- struct list_head *head;
struct dquot *dquot;
unsigned long freed = 0;
spin_lock(&dq_list_lock);
- head = free_dquots.prev;
- while (head != &free_dquots && sc->nr_to_scan) {
- dquot = list_entry(head, struct dquot, dq_free);
+ while (!list_empty(&free_dquots) && sc->nr_to_scan) {
+ dquot = list_first_entry(&free_dquots, struct dquot, dq_free);
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
do_destroy_dquot(dquot);
sc->nr_to_scan--;
freed++;
- head = free_dquots.prev;
}
spin_unlock(&dq_list_lock);
return freed;
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 7e288d97adcb..9fed1c05f1f4 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key)
}
/* %k */
-static void sprintf_le_key(char *buf, struct reiserfs_key *key)
+static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key)
{
if (key)
- sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id),
- le32_to_cpu(key->k_objectid), le_offset(key),
- le_type(key));
+ return scnprintf(buf, size, "[%d %d %s %s]",
+ le32_to_cpu(key->k_dir_id),
+ le32_to_cpu(key->k_objectid), le_offset(key),
+ le_type(key));
else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
/* %K */
-static void sprintf_cpu_key(char *buf, struct cpu_key *key)
+static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key)
{
if (key)
- sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
- key->on_disk_key.k_objectid, reiserfs_cpu_offset(key),
- cpu_type(key));
+ return scnprintf(buf, size, "[%d %d %s %s]",
+ key->on_disk_key.k_dir_id,
+ key->on_disk_key.k_objectid,
+ reiserfs_cpu_offset(key), cpu_type(key));
else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
-static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh)
+static int scnprintf_de_head(char *buf, size_t size,
+ struct reiserfs_de_head *deh)
{
if (deh)
- sprintf(buf,
- "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
- deh_offset(deh), deh_dir_id(deh), deh_objectid(deh),
- deh_location(deh), deh_state(deh));
+ return scnprintf(buf, size,
+ "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
+ deh_offset(deh), deh_dir_id(deh),
+ deh_objectid(deh), deh_location(deh),
+ deh_state(deh));
else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
-static void sprintf_item_head(char *buf, struct item_head *ih)
+static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih)
{
if (ih) {
- strcpy(buf,
- (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*");
- sprintf_le_key(buf + strlen(buf), &(ih->ih_key));
- sprintf(buf + strlen(buf), ", item_len %d, item_location %d, "
- "free_space(entry_count) %d",
- ih_item_len(ih), ih_location(ih), ih_free_space(ih));
+ char *p = buf;
+ char * const end = buf + size;
+
+ p += scnprintf(p, end - p, "%s",
+ (ih_version(ih) == KEY_FORMAT_3_6) ?
+ "*3.6* " : "*3.5*");
+
+ p += scnprintf_le_key(p, end - p, &ih->ih_key);
+
+ p += scnprintf(p, end - p,
+ ", item_len %d, item_location %d, free_space(entry_count) %d",
+ ih_item_len(ih), ih_location(ih),
+ ih_free_space(ih));
+ return p - buf;
} else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
-static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de)
+static int scnprintf_direntry(char *buf, size_t size,
+ struct reiserfs_dir_entry *de)
{
char name[20];
memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
- sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
+ return scnprintf(buf, size, "\"%s\"==>[%d %d]",
+ name, de->de_dir_id, de->de_objectid);
}
-static void sprintf_block_head(char *buf, struct buffer_head *bh)
+static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh)
{
- sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
- B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
+ return scnprintf(buf, size,
+ "level=%d, nr_items=%d, free_space=%d rdkey ",
+ B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
}
-static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
+static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh)
{
- sprintf(buf,
- "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
- bh->b_bdev, bh->b_size,
- (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
- bh->b_state, bh->b_page,
- buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
- buffer_dirty(bh) ? "DIRTY" : "CLEAN",
- buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
+ return scnprintf(buf, size,
+ "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+ bh->b_bdev, bh->b_size,
+ (unsigned long long)bh->b_blocknr,
+ atomic_read(&(bh->b_count)),
+ bh->b_state, bh->b_page,
+ buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
+ buffer_dirty(bh) ? "DIRTY" : "CLEAN",
+ buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
}
-static void sprintf_disk_child(char *buf, struct disk_child *dc)
+static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc)
{
- sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc),
- dc_size(dc));
+ return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]",
+ dc_block_number(dc), dc_size(dc));
}
static char *is_there_reiserfs_struct(char *fmt, int *what)
@@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args)
char *fmt1 = fmt_buf;
char *k;
char *p = error_buf;
+ char * const end = &error_buf[sizeof(error_buf)];
int what;
spin_lock(&error_lock);
- strcpy(fmt1, fmt);
+ if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) {
+ strscpy(error_buf, "format string too long", end - error_buf);
+ goto out_unlock;
+ }
while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) {
*k = 0;
- p += vsprintf(p, fmt1, args);
+ p += vscnprintf(p, end - p, fmt1, args);
switch (what) {
case 'k':
- sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
+ p += scnprintf_le_key(p, end - p,
+ va_arg(args, struct reiserfs_key *));
break;
case 'K':
- sprintf_cpu_key(p, va_arg(args, struct cpu_key *));
+ p += scnprintf_cpu_key(p, end - p,
+ va_arg(args, struct cpu_key *));
break;
case 'h':
- sprintf_item_head(p, va_arg(args, struct item_head *));
+ p += scnprintf_item_head(p, end - p,
+ va_arg(args, struct item_head *));
break;
case 't':
- sprintf_direntry(p,
- va_arg(args,
- struct reiserfs_dir_entry *));
+ p += scnprintf_direntry(p, end - p,
+ va_arg(args, struct reiserfs_dir_entry *));
break;
case 'y':
- sprintf_disk_child(p,
- va_arg(args, struct disk_child *));
+ p += scnprintf_disk_child(p, end - p,
+ va_arg(args, struct disk_child *));
break;
case 'z':
- sprintf_block_head(p,
- va_arg(args, struct buffer_head *));
+ p += scnprintf_block_head(p, end - p,
+ va_arg(args, struct buffer_head *));
break;
case 'b':
- sprintf_buffer_head(p,
- va_arg(args, struct buffer_head *));
+ p += scnprintf_buffer_head(p, end - p,
+ va_arg(args, struct buffer_head *));
break;
case 'a':
- sprintf_de_head(p,
- va_arg(args,
- struct reiserfs_de_head *));
+ p += scnprintf_de_head(p, end - p,
+ va_arg(args, struct reiserfs_de_head *));
break;
}
- p += strlen(p);
fmt1 = k + 2;
}
- vsprintf(p, fmt1, args);
+ p += vscnprintf(p, end - p, fmt1, args);
+out_unlock:
spin_unlock(&error_lock);
}
diff --git a/fs/select.c b/fs/select.c
index 317891ff8165..4a6b6e4b21cb 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -34,29 +34,6 @@
#include <linux/uaccess.h>
-__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
-{
- if (file->f_op->poll) {
- return file->f_op->poll(file, pt);
- } else if (file_has_poll_mask(file)) {
- unsigned int events = poll_requested_events(pt);
- struct wait_queue_head *head;
-
- if (pt && pt->_qproc) {
- head = file->f_op->get_poll_head(file, events);
- if (!head)
- return DEFAULT_POLLMASK;
- if (IS_ERR(head))
- return EPOLLERR;
- pt->_qproc(file, head, pt);
- }
-
- return file->f_op->poll_mask(file, events);
- } else {
- return DEFAULT_POLLMASK;
- }
-}
-EXPORT_SYMBOL_GPL(vfs_poll);
/*
* Estimate expected accuracy in ns from a timeval.
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2751476e6b6e..f098b9f1c396 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
}
if (compressed) {
+ if (!msblk->stream)
+ goto read_failure;
length = squashfs_decompress(msblk, bh, b, offset, length,
output);
if (length < 0)
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 23813c078cc9..0839efa720b3 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
+ if (unlikely(length < 0))
+ return -EIO;
+
while (length) {
entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
if (entry->error) {
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 13d80947bf9e..f1c1430ae721 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n,
}
for (i = 0; i < blocks; i++) {
- int size = le32_to_cpu(blist[i]);
+ int size = squashfs_block_size(blist[i]);
+ if (size < 0) {
+ err = size;
+ goto failure;
+ }
block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size);
}
n -= blocks;
@@ -367,7 +371,24 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
sizeof(size));
if (res < 0)
return res;
- return le32_to_cpu(size);
+ return squashfs_block_size(size);
+}
+
+void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail)
+{
+ int copied;
+ void *pageaddr;
+
+ pageaddr = kmap_atomic(page);
+ copied = squashfs_copy_data(pageaddr, buffer, offset, avail);
+ memset(pageaddr + copied, 0, PAGE_SIZE - copied);
+ kunmap_atomic(pageaddr);
+
+ flush_dcache_page(page);
+ if (copied == avail)
+ SetPageUptodate(page);
+ else
+ SetPageError(page);
}
/* Copy data into page cache */
@@ -376,7 +397,6 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
{
struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- void *pageaddr;
int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
int start_index = page->index & ~mask, end_index = start_index | mask;
@@ -402,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
if (PageUptodate(push_page))
goto skip_page;
- pageaddr = kmap_atomic(push_page);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_SIZE - avail);
- kunmap_atomic(pageaddr);
- flush_dcache_page(push_page);
- SetPageUptodate(push_page);
+ squashfs_fill_page(push_page, buffer, offset, avail);
skip_page:
unlock_page(push_page);
if (i != page->index)
@@ -416,10 +431,9 @@ skip_page:
}
/* Read datablock stored packed inside a fragment (tail-end packed block) */
-static int squashfs_readpage_fragment(struct page *page)
+static int squashfs_readpage_fragment(struct page *page, int expected)
{
struct inode *inode = page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
squashfs_i(inode)->fragment_block,
squashfs_i(inode)->fragment_size);
@@ -430,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page)
squashfs_i(inode)->fragment_block,
squashfs_i(inode)->fragment_size);
else
- squashfs_copy_cache(page, buffer, i_size_read(inode) &
- (msblk->block_size - 1),
+ squashfs_copy_cache(page, buffer, expected,
squashfs_i(inode)->fragment_offset);
squashfs_cache_put(buffer);
return res;
}
-static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
+static int squashfs_readpage_sparse(struct page *page, int expected)
{
- struct inode *inode = page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int bytes = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
-
- squashfs_copy_cache(page, NULL, bytes, 0);
+ squashfs_copy_cache(page, NULL, expected, 0);
return 0;
}
@@ -456,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page)
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
int index = page->index >> (msblk->block_log - PAGE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
+ int expected = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
int res;
void *pageaddr;
@@ -474,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page)
goto error_out;
if (bsize == 0)
- res = squashfs_readpage_sparse(page, index, file_end);
+ res = squashfs_readpage_sparse(page, expected);
else
- res = squashfs_readpage_block(page, block, bsize);
+ res = squashfs_readpage_block(page, block, bsize, expected);
} else
- res = squashfs_readpage_fragment(page);
+ res = squashfs_readpage_fragment(page, expected);
if (!res)
return 0;
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
index f2310d2a2019..a9ba8d96776a 100644
--- a/fs/squashfs/file_cache.c
+++ b/fs/squashfs/file_cache.c
@@ -20,7 +20,7 @@
#include "squashfs.h"
/* Read separately compressed datablock and memcopy into page cache */
-int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected)
{
struct inode *i = page->mapping->host;
struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
@@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize)
ERROR("Unable to read page, block %llx, size %x\n", block,
bsize);
else
- squashfs_copy_cache(page, buffer, buffer->length, 0);
+ squashfs_copy_cache(page, buffer, expected, 0);
squashfs_cache_put(buffer);
return res;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index cb485d8e0e91..80db1b86a27c 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -21,10 +21,11 @@
#include "page_actor.h"
static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page);
+ int pages, struct page **page, int bytes);
/* Read separately compressed datablock directly into page cache */
-int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
+ int expected)
{
struct inode *inode = target_page->mapping->host;
@@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
* using an intermediate buffer.
*/
res = squashfs_read_cache(target_page, block, bsize, pages,
- page);
+ page, expected);
if (res < 0)
goto mark_errored;
@@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
if (res < 0)
goto mark_errored;
+ if (res != expected) {
+ res = -EIO;
+ goto mark_errored;
+ }
+
/* Last page may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
if (bytes) {
@@ -138,13 +144,12 @@ out:
static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page)
+ int pages, struct page **page, int bytes)
{
struct inode *i = target_page->mapping->host;
struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
block, bsize);
- int bytes = buffer->length, res = buffer->error, n, offset = 0;
- void *pageaddr;
+ int res = buffer->error, n, offset = 0;
if (res) {
ERROR("Unable to read page, block %llx, size %x\n", block,
@@ -159,12 +164,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
if (page[n] == NULL)
continue;
- pageaddr = kmap_atomic(page[n]);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_SIZE - avail);
- kunmap_atomic(pageaddr);
- flush_dcache_page(page[n]);
- SetPageUptodate(page[n]);
+ squashfs_fill_page(page[n], buffer, offset, avail);
unlock_page(page[n]);
if (page[n] != target_page)
put_page(page[n]);
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 0ed6edbc5c71..0681feab4a84 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
u64 *fragment_block)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- int block = SQUASHFS_FRAGMENT_INDEX(fragment);
- int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
- u64 start_block = le64_to_cpu(msblk->fragment_index[block]);
+ int block, offset, size;
struct squashfs_fragment_entry fragment_entry;
- int size;
+ u64 start_block;
+
+ if (fragment >= msblk->fragments)
+ return -EIO;
+ block = SQUASHFS_FRAGMENT_INDEX(fragment);
+ offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
+
+ start_block = le64_to_cpu(msblk->fragment_index[block]);
size = squashfs_read_metadata(sb, &fragment_entry, &start_block,
&offset, sizeof(fragment_entry));
@@ -61,9 +66,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
return size;
*fragment_block = le64_to_cpu(fragment_entry.start_block);
- size = le32_to_cpu(fragment_entry.size);
-
- return size;
+ return squashfs_block_size(fragment_entry.size);
}
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 887d6d270080..f89f8a74c6ce 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -67,11 +67,12 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
u64, u64, unsigned int);
/* file.c */
+void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int);
void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
int);
/* file_xxx.c */
-extern int squashfs_readpage_block(struct page *, u64, int);
+extern int squashfs_readpage_block(struct page *, u64, int, int);
/* id.c */
extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 24d12fd14177..4e6853f084d0 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -129,6 +129,12 @@
#define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK))
+static inline int squashfs_block_size(__le32 raw)
+{
+ u32 size = le32_to_cpu(raw);
+ return (size >> 25) ? -EIO : size;
+}
+
/*
* Inode number ops. Inodes consist of a compressed block number, and an
* uncompressed offset within that block
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565cb50c3..ef69c31947bf 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -75,6 +75,7 @@ struct squashfs_sb_info {
unsigned short block_log;
long long bytes_used;
unsigned int inodes;
+ unsigned int fragments;
int xattr_ids;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 8a73b97217c8..40e657386fa5 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->inode_table = le64_to_cpu(sblk->inode_table_start);
msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
msblk->inodes = le32_to_cpu(sblk->inodes);
+ msblk->fragments = le32_to_cpu(sblk->fragments);
flags = le16_to_cpu(sblk->flags);
TRACE("Found valid superblock on %pg\n", sb->s_bdev);
@@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
TRACE("Filesystem size %lld bytes\n", msblk->bytes_used);
TRACE("Block size %d\n", msblk->block_size);
TRACE("Number of inodes %d\n", msblk->inodes);
- TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments));
+ TRACE("Number of fragments %d\n", msblk->fragments);
TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
@@ -272,7 +273,7 @@ allocate_id_index_table:
sb->s_export_op = &squashfs_export_ops;
handle_fragments:
- fragments = le32_to_cpu(sblk->fragments);
+ fragments = msblk->fragments;
if (fragments == 0)
goto check_directory_table;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d84a2bee4f82..cdad49da3ff7 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -226,20 +226,21 @@ static int timerfd_release(struct inode *inode, struct file *file)
kfree_rcu(ctx, rcu);
return 0;
}
-
-static struct wait_queue_head *timerfd_get_poll_head(struct file *file,
- __poll_t eventmask)
+
+static __poll_t timerfd_poll(struct file *file, poll_table *wait)
{
struct timerfd_ctx *ctx = file->private_data;
+ __poll_t events = 0;
+ unsigned long flags;
- return &ctx->wqh;
-}
+ poll_wait(file, &ctx->wqh, wait);
-static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask)
-{
- struct timerfd_ctx *ctx = file->private_data;
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+ if (ctx->ticks)
+ events |= EPOLLIN;
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
- return ctx->ticks ? EPOLLIN : 0;
+ return events;
}
static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
@@ -363,8 +364,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
- .get_poll_head = timerfd_get_poll_head,
- .poll_mask = timerfd_poll_mask,
+ .poll = timerfd_poll,
.read = timerfd_read,
.llseek = noop_llseek,
.show_fdinfo = timerfd_show,
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1b961b1d9699..fcda0fc97b90 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -533,8 +533,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
udf_write_aext(table, &epos, &eloc,
(etype << 30) | elen, 1);
} else
- udf_delete_aext(table, epos, eloc,
- (etype << 30) | elen);
+ udf_delete_aext(table, epos);
} else {
alloc_count = 0;
}
@@ -630,7 +629,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb,
if (goal_elen)
udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
else
- udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
+ udf_delete_aext(table, goal_epos);
brelse(goal_epos.bh);
udf_add_free_space(sb, partition, -1);
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 0a98a2369738..d9523013096f 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -141,10 +141,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
fibh->ebh->b_data,
sizeof(struct fileIdentDesc) + fibh->soffset);
- fi_len = (sizeof(struct fileIdentDesc) +
- cfi->lengthFileIdent +
- le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3;
-
+ fi_len = udf_dir_entry_len(cfi);
*nf_pos += fi_len - (fibh->eoffset - fibh->soffset);
fibh->eoffset = fibh->soffset + fi_len;
} else {
@@ -152,6 +149,9 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
sizeof(struct fileIdentDesc));
}
}
+ /* Got last entry outside of dir size - fs is corrupted! */
+ if (*nf_pos > dir->i_size)
+ return NULL;
return fi;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7f39d17352c9..9915a58fbabd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1147,8 +1147,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr
if (startnum > endnum) {
for (i = 0; i < (startnum - endnum); i++)
- udf_delete_aext(inode, *epos, laarr[i].extLocation,
- laarr[i].extLength);
+ udf_delete_aext(inode, *epos);
} else if (startnum < endnum) {
for (i = 0; i < (endnum - startnum); i++) {
udf_insert_aext(inode, *epos, laarr[i].extLocation,
@@ -2176,14 +2175,15 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
return (nelen >> 30);
}
-int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
- struct kernel_lb_addr eloc, uint32_t elen)
+int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
{
struct extent_position oepos;
int adsize;
int8_t etype;
struct allocExtDesc *aed;
struct udf_inode_info *iinfo;
+ struct kernel_lb_addr eloc;
+ uint32_t elen;
if (epos.bh) {
get_bh(epos.bh);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c586026508db..06f37ddd2997 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -351,8 +351,6 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
loff_t f_pos;
loff_t size = udf_ext0_offset(dir) + dir->i_size;
int nfidlen;
- uint8_t lfi;
- uint16_t liu;
udf_pblk_t block;
struct kernel_lb_addr eloc;
uint32_t elen = 0;
@@ -383,7 +381,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
namelen = 0;
}
- nfidlen = (sizeof(struct fileIdentDesc) + namelen + 3) & ~3;
+ nfidlen = ALIGN(sizeof(struct fileIdentDesc) + namelen, UDF_NAME_PAD);
f_pos = udf_ext0_offset(dir);
@@ -424,12 +422,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
goto out_err;
}
- liu = le16_to_cpu(cfi->lengthOfImpUse);
- lfi = cfi->lengthFileIdent;
-
if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
- if (((sizeof(struct fileIdentDesc) +
- liu + lfi + 3) & ~3) == nfidlen) {
+ if (udf_dir_entry_len(cfi) == nfidlen) {
cfi->descTag.tagSerialNum = cpu_to_le16(1);
cfi->fileVersionNum = cpu_to_le16(1);
cfi->fileCharacteristics = 0;
@@ -1201,9 +1195,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
if (dir_fi) {
dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
- udf_update_tag((char *)dir_fi,
- (sizeof(struct fileIdentDesc) +
- le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3);
+ udf_update_tag((char *)dir_fi, udf_dir_entry_len(dir_fi));
if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
mark_inode_dirty(old_inode);
else
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index bae311b59400..84c47dde4d26 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -132,6 +132,12 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
struct fileIdentDesc *, struct udf_fileident_bh *,
uint8_t *, uint8_t *);
+static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi)
+{
+ return ALIGN(sizeof(struct fileIdentDesc) +
+ le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent,
+ UDF_NAME_PAD);
+}
/* file.c */
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
@@ -167,8 +173,7 @@ extern int udf_add_aext(struct inode *, struct extent_position *,
struct kernel_lb_addr *, uint32_t, int);
extern void udf_write_aext(struct inode *, struct extent_position *,
struct kernel_lb_addr *, uint32_t, int);
-extern int8_t udf_delete_aext(struct inode *, struct extent_position,
- struct kernel_lb_addr, uint32_t);
+extern int8_t udf_delete_aext(struct inode *, struct extent_position);
extern int8_t udf_next_aext(struct inode *, struct extent_position *,
struct kernel_lb_addr *, uint32_t *, int);
extern int8_t udf_current_aext(struct inode *, struct extent_position *,
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 123bf7d516fc..bad9cea37f12 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -222,24 +222,26 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
unsigned long reason)
{
struct mm_struct *mm = ctx->mm;
- pte_t *pte;
+ pte_t *ptep, pte;
bool ret = true;
VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
- pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
- if (!pte)
+ ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
+
+ if (!ptep)
goto out;
ret = false;
+ pte = huge_ptep_get(ptep);
/*
* Lockless access: we're in a wait_event so it's ok if it
* changes under us.
*/
- if (huge_pte_none(*pte))
+ if (huge_pte_none(pte))
ret = true;
- if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP))
+ if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
ret = true;
out:
return ret;
@@ -631,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next)
- if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
+ if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+ vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
+ }
up_write(&mm->mmap_sem);
userfaultfd_ctx_put(release_new_ctx);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 84db76e0e3e3..fecd187fcf2c 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -157,6 +157,7 @@ __xfs_ag_resv_free(
error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
resv->ar_reserved = 0;
resv->ar_asked = 0;
+ resv->ar_orig_reserved = 0;
if (error)
trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
@@ -189,13 +190,34 @@ __xfs_ag_resv_init(
struct xfs_mount *mp = pag->pag_mount;
struct xfs_ag_resv *resv;
int error;
- xfs_extlen_t reserved;
+ xfs_extlen_t hidden_space;
if (used > ask)
ask = used;
- reserved = ask - used;
- error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
+ switch (type) {
+ case XFS_AG_RESV_RMAPBT:
+ /*
+ * Space taken by the rmapbt is not subtracted from fdblocks
+ * because the rmapbt lives in the free space. Here we must
+ * subtract the entire reservation from fdblocks so that we
+ * always have blocks available for rmapbt expansion.
+ */
+ hidden_space = ask;
+ break;
+ case XFS_AG_RESV_METADATA:
+ /*
+ * Space taken by all other metadata btrees are accounted
+ * on-disk as used space. We therefore only hide the space
+ * that is reserved but not used by the trees.
+ */
+ hidden_space = ask - used;
+ break;
+ default:
+ ASSERT(0);
+ return -EINVAL;
+ }
+ error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
if (error) {
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_);
@@ -216,7 +238,8 @@ __xfs_ag_resv_init(
resv = xfs_perag_resv(pag, type);
resv->ar_asked = ask;
- resv->ar_reserved = resv->ar_orig_reserved = reserved;
+ resv->ar_orig_reserved = hidden_space;
+ resv->ar_reserved = ask - used;
trace_xfs_ag_resv_init(pag, type, ask);
return 0;
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index eef466260d43..75dbdc14c45f 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -223,12 +223,13 @@ xfs_alloc_get_rec(
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !(*stat))
return error;
- if (rec->alloc.ar_blockcount == 0)
- goto out_bad_rec;
*bno = be32_to_cpu(rec->alloc.ar_startblock);
*len = be32_to_cpu(rec->alloc.ar_blockcount);
+ if (*len == 0)
+ goto out_bad_rec;
+
/* check for valid extent range, including overflow */
if (!xfs_verify_agbno(mp, agno, *bno))
goto out_bad_rec;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 01628f0c9a0c..7205268b30bc 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5780,6 +5780,32 @@ del_cursor:
return error;
}
+/* Make sure we won't be right-shifting an extent past the maximum bound. */
+int
+xfs_bmap_can_insert_extents(
+ struct xfs_inode *ip,
+ xfs_fileoff_t off,
+ xfs_fileoff_t shift)
+{
+ struct xfs_bmbt_irec got;
+ int is_empty;
+ int error = 0;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
+ if (!error && !is_empty && got.br_startoff >= off &&
+ ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
+ error = -EINVAL;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ return error;
+}
+
int
xfs_bmap_insert_extents(
struct xfs_trans *tp,
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 99dddbd0fcc6..9b49ddf99c41 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -227,6 +227,8 @@ int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
bool *done, xfs_fsblock_t *firstblock,
struct xfs_defer_ops *dfops);
+int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off,
+ xfs_fileoff_t shift);
int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1c5a8aaf2bfc..059bc44c27e8 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -962,6 +962,9 @@ typedef enum xfs_dinode_fmt {
XFS_DFORK_DSIZE(dip, mp) : \
XFS_DFORK_ASIZE(dip, mp))
+#define XFS_DFORK_MAXEXT(dip, mp, w) \
+ (XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec))
+
/*
* Return pointers to the data or attribute forks.
*/
@@ -1526,6 +1529,8 @@ typedef struct xfs_bmdr_block {
#define BMBT_STARTBLOCK_BITLEN 52
#define BMBT_BLOCKCOUNT_BITLEN 21
+#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1)
+
typedef struct xfs_bmbt_rec {
__be64 l0, l1;
} xfs_bmbt_rec_t;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d38d724534c4..30d1d60f1d46 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -374,6 +374,47 @@ xfs_log_dinode_to_disk(
}
}
+static xfs_failaddr_t
+xfs_dinode_verify_fork(
+ struct xfs_dinode *dip,
+ struct xfs_mount *mp,
+ int whichfork)
+{
+ uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
+
+ switch (XFS_DFORK_FORMAT(dip, whichfork)) {
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+ * no local regular files yet
+ */
+ if (whichfork == XFS_DATA_FORK) {
+ if (S_ISREG(be16_to_cpu(dip->di_mode)))
+ return __this_address;
+ if (be64_to_cpu(dip->di_size) >
+ XFS_DFORK_SIZE(dip, mp, whichfork))
+ return __this_address;
+ }
+ if (di_nextents)
+ return __this_address;
+ break;
+ case XFS_DINODE_FMT_EXTENTS:
+ if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
+ return __this_address;
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ if (whichfork == XFS_ATTR_FORK) {
+ if (di_nextents > MAXAEXTNUM)
+ return __this_address;
+ } else if (di_nextents > MAXEXTNUM) {
+ return __this_address;
+ }
+ break;
+ default:
+ return __this_address;
+ }
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -441,24 +482,9 @@ xfs_dinode_verify(
case S_IFREG:
case S_IFLNK:
case S_IFDIR:
- switch (dip->di_format) {
- case XFS_DINODE_FMT_LOCAL:
- /*
- * no local regular files yet
- */
- if (S_ISREG(mode))
- return __this_address;
- if (di_size > XFS_DFORK_DSIZE(dip, mp))
- return __this_address;
- if (dip->di_nextents)
- return __this_address;
- /* fall through */
- case XFS_DINODE_FMT_EXTENTS:
- case XFS_DINODE_FMT_BTREE:
- break;
- default:
- return __this_address;
- }
+ fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
+ if (fa)
+ return fa;
break;
case 0:
/* Uninitialized inode ok. */
@@ -468,17 +494,9 @@ xfs_dinode_verify(
}
if (XFS_DFORK_Q(dip)) {
- switch (dip->di_aformat) {
- case XFS_DINODE_FMT_LOCAL:
- if (dip->di_anextents)
- return __this_address;
- /* fall through */
- case XFS_DINODE_FMT_EXTENTS:
- case XFS_DINODE_FMT_BTREE:
- break;
- default:
- return __this_address;
- }
+ fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
+ if (fa)
+ return fa;
} else {
/*
* If there is no fork offset, this may be a freshly-made inode
@@ -713,7 +731,8 @@ xfs_inode_validate_extsize(
if ((hint_flag || inherit_flag) && extsize == 0)
return __this_address;
- if (!(hint_flag || inherit_flag) && extsize != 0)
+ /* free inodes get flags set to zero but extsize remains */
+ if (mode && !(hint_flag || inherit_flag) && extsize != 0)
return __this_address;
if (extsize_bytes % blocksize_bytes)
@@ -759,7 +778,8 @@ xfs_inode_validate_cowextsize(
if (hint_flag && cowextsize == 0)
return __this_address;
- if (!hint_flag && cowextsize != 0)
+ /* free inodes get flags set to zero but cowextsize remains */
+ if (mode && !hint_flag && cowextsize != 0)
return __this_address;
if (hint_flag && rt_flag)
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 65fc4ed2e9a1..b228c821bae6 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1029,8 +1029,8 @@ xfs_rtalloc_query_range(
if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
low_rec->ar_startext == high_rec->ar_startext)
return 0;
- if (high_rec->ar_startext >= mp->m_sb.sb_rextents)
- high_rec->ar_startext = mp->m_sb.sb_rextents - 1;
+ if (high_rec->ar_startext > mp->m_sb.sb_rextents)
+ high_rec->ar_startext = mp->m_sb.sb_rextents;
/* Iterate the bitmap, looking for discrepancies. */
rtstart = low_rec->ar_startext;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c35009a86699..83b1e8c6c18f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -685,12 +685,10 @@ out_unlock_iolock:
}
/*
- * dead simple method of punching delalyed allocation blocks from a range in
- * the inode. Walks a block at a time so will be slow, but is only executed in
- * rare error cases so the overhead is not critical. This will always punch out
- * both the start and end blocks, even if the ranges only partially overlap
- * them, so it is up to the caller to ensure that partial blocks are not
- * passed in.
+ * Dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. This will always punch out both the start and end blocks, even
+ * if the ranges only partially overlap them, so it is up to the caller to
+ * ensure that partial blocks are not passed in.
*/
int
xfs_bmap_punch_delalloc_range(
@@ -698,63 +696,44 @@ xfs_bmap_punch_delalloc_range(
xfs_fileoff_t start_fsb,
xfs_fileoff_t length)
{
- xfs_fileoff_t remaining = length;
+ struct xfs_ifork *ifp = &ip->i_df;
+ xfs_fileoff_t end_fsb = start_fsb + length;
+ struct xfs_bmbt_irec got, del;
+ struct xfs_iext_cursor icur;
int error = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- do {
- int done;
- xfs_bmbt_irec_t imap;
- int nimaps = 1;
- xfs_fsblock_t firstblock;
- struct xfs_defer_ops dfops;
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
- /*
- * Map the range first and check that it is a delalloc extent
- * before trying to unmap the range. Otherwise we will be
- * trying to remove a real extent (which requires a
- * transaction) or a hole, which is probably a bad idea...
- */
- error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
- XFS_BMAPI_ENTIRE);
+ if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
+ return 0;
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_alert(ip->i_mount,
- "Failed delalloc mapping lookup ino %lld fsb %lld.",
- ip->i_ino, start_fsb);
- }
- break;
- }
- if (!nimaps) {
- /* nothing there */
- goto next_block;
- }
- if (imap.br_startblock != DELAYSTARTBLOCK) {
- /* been converted, ignore */
- goto next_block;
- }
- WARN_ON(imap.br_blockcount == 0);
+ while (got.br_startoff + got.br_blockcount > start_fsb) {
+ del = got;
+ xfs_trim_extent(&del, start_fsb, length);
/*
- * Note: while we initialise the firstblock/dfops pair, they
- * should never be used because blocks should never be
- * allocated or freed for a delalloc extent and hence we need
- * don't cancel or finish them after the xfs_bunmapi() call.
+ * A delete can push the cursor forward. Step back to the
+ * previous extent on non-delalloc or extents outside the
+ * target range.
*/
- xfs_defer_init(&dfops, &firstblock);
- error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
- &dfops, &done);
- if (error)
- break;
+ if (!del.br_blockcount ||
+ !isnullstartblock(del.br_startblock)) {
+ if (!xfs_iext_prev_extent(ifp, &icur, &got))
+ break;
+ continue;
+ }
- ASSERT(!xfs_defer_has_unfinished_work(&dfops));
-next_block:
- start_fsb++;
- remaining--;
- } while(remaining > 0);
+ error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
+ &got, &del);
+ if (error || !xfs_iext_get_extent(ifp, &icur, &got))
+ break;
+ }
return error;
}
@@ -1208,7 +1187,22 @@ xfs_free_file_space(
return 0;
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
- return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
+ error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
+ if (error)
+ return error;
+
+ /*
+ * If we zeroed right up to EOF and EOF straddles a page boundary we
+ * must make sure that the post-EOF area is also zeroed because the
+ * page could be mmap'd and iomap_zero_range doesn't do that for us.
+ * Writeback of the eof page will do this, albeit clumsily.
+ */
+ if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) {
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ (offset + len) & ~PAGE_MASK, LLONG_MAX);
+ }
+
+ return error;
}
/*
@@ -1404,6 +1398,10 @@ xfs_insert_file_space(
trace_xfs_insert_file_space(ip);
+ error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
+ if (error)
+ return error;
+
error = xfs_prepare_shift(ip, offset);
if (error)
return error;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index c34fa9c342f2..c7157bc48bd1 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -513,8 +513,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(
struct xfs_trans *tp,
struct xfs_getfsmap_info *info)
{
- struct xfs_rtalloc_rec alow;
- struct xfs_rtalloc_rec ahigh;
+ struct xfs_rtalloc_rec alow = { 0 };
+ struct xfs_rtalloc_rec ahigh = { 0 };
int error;
xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a7afcad6b711..3f2bd6032cf8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -387,7 +387,7 @@ xfs_reserve_blocks(
do {
free = percpu_counter_sum(&mp->m_fdblocks) -
mp->m_alloc_set_aside;
- if (!free)
+ if (free <= 0)
break;
delta = request - mp->m_resblks;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7a96c4e0ab5c..5df4de666cc1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3236,7 +3236,6 @@ xfs_iflush_cluster(
struct xfs_inode *cip;
int nr_found;
int clcount = 0;
- int bufwasdelwri;
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -3360,37 +3359,22 @@ cluster_corrupt_out:
* inode buffer and shut down the filesystem.
*/
rcu_read_unlock();
- /*
- * Clean up the buffer. If it was delwri, just release it --
- * brelse can handle it with no problems. If not, shut down the
- * filesystem before releasing the buffer.
- */
- bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
- if (bufwasdelwri)
- xfs_buf_relse(bp);
-
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- if (!bufwasdelwri) {
- /*
- * Just like incore_relse: if we have b_iodone functions,
- * mark the buffer as an error and call them. Otherwise
- * mark it as stale and brelse.
- */
- if (bp->b_iodone) {
- bp->b_flags &= ~XBF_DONE;
- xfs_buf_stale(bp);
- xfs_buf_ioerror(bp, -EIO);
- xfs_buf_ioend(bp);
- } else {
- xfs_buf_stale(bp);
- xfs_buf_relse(bp);
- }
- }
-
/*
- * Unlocks the flush lock
+ * We'll always have an inode attached to the buffer for completion
+ * process by the time we are called from xfs_iflush(). Hence we have
+ * always need to do IO completion processing to abort the inodes
+ * attached to the buffer. handle them just like the shutdown case in
+ * xfs_buf_submit().
*/
+ ASSERT(bp->b_iodone);
+ bp->b_flags &= ~XBF_DONE;
+ xfs_buf_stale(bp);
+ xfs_buf_ioerror(bp, -EIO);
+ xfs_buf_ioend(bp);
+
+ /* abort the corrupt inode, as it was not attached to the buffer */
xfs_iflush_abort(cip, false);
kmem_free(cilist);
xfs_perag_put(pag);
@@ -3486,12 +3470,17 @@ xfs_iflush(
xfs_log_force(mp, 0);
/*
- * inode clustering:
- * see if other inodes can be gathered into this write
+ * inode clustering: try to gather other inodes into this write
+ *
+ * Note: Any error during clustering will result in the filesystem
+ * being shut down and completion callbacks run on the cluster buffer.
+ * As we have already flushed and attached this inode to the buffer,
+ * it has already been aborted and released by xfs_iflush_cluster() and
+ * so we have no further error handling to do here.
*/
error = xfs_iflush_cluster(ip, bp);
if (error)
- goto cluster_corrupt_out;
+ return error;
*bpp = bp;
return 0;
@@ -3500,12 +3489,8 @@ corrupt_out:
if (bp)
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-cluster_corrupt_out:
- error = -EFSCORRUPTED;
abort_out:
- /*
- * Unlocks the flush lock
- */
+ /* abort the corrupt inode, as it was not attached to the buffer */
xfs_iflush_abort(ip, false);
return error;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49f5492eed3b..55876dd02f0c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -963,12 +963,13 @@ xfs_ilock_for_iomap(
unsigned *lockmode)
{
unsigned mode = XFS_ILOCK_SHARED;
+ bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO);
/*
* COW writes may allocate delalloc space or convert unwritten COW
* extents, so we need to make sure to take the lock exclusively here.
*/
- if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) {
+ if (xfs_is_reflink_inode(ip) && is_write) {
/*
* FIXME: It could still overwrite on unshared extents and not
* need allocation.
@@ -989,6 +990,7 @@ xfs_ilock_for_iomap(
mode = XFS_ILOCK_EXCL;
}
+relock:
if (flags & IOMAP_NOWAIT) {
if (!xfs_ilock_nowait(ip, mode))
return -EAGAIN;
@@ -996,6 +998,17 @@ xfs_ilock_for_iomap(
xfs_ilock(ip, mode);
}
+ /*
+ * The reflink iflag could have changed since the earlier unlocked
+ * check, so if we got ILOCK_SHARED for a write and but we're now a
+ * reflink inode we have to switch to ILOCK_EXCL and relock.
+ */
+ if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
+ xfs_iunlock(ip, mode);
+ mode = XFS_ILOCK_EXCL;
+ goto relock;
+ }
+
*lockmode = mode;
return 0;
}
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index e040af120b69..524f543c5b82 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -258,7 +258,12 @@ xfs_trans_alloc(
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
- WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
+ /*
+ * Zero-reservation ("empty") transactions can't modify anything, so
+ * they're allowed to run while we're frozen.
+ */
+ WARN_ON(resp->tr_logres > 0 &&
+ mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
atomic_inc(&mp->m_active_trans);
tp = kmem_zone_zalloc(xfs_trans_zone,