summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2019-02-08 14:20:32 +0100
committerTakashi Iwai <tiwai@suse.de>2019-02-08 14:20:32 +0100
commitd02cac152c97dffcb0cdd91e09b54fd6e2cca63d (patch)
tree68e4c6bd842703009f3edbf8f0e0e9326e4b2fad /fs
parent36e4617c01153757cde9e5fcd375a75a8f8425c3 (diff)
parenta50e32694fbcdbf55875095258b9398e2eabd71f (diff)
downloadlinux-d02cac152c97dffcb0cdd91e09b54fd6e2cca63d.tar.bz2
Merge tag 'asoc-v5.1' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-next
ASoC: Updates for v5.1 Lots and lots of new drivers so far, a highlight being the MediaTek BTCVSD which is a driver for a Bluetooth radio chip - the first such driver we've had upstream. Hopefully we will soon also see a baseband with an upstream driver! - Support for only powering up channels that are actively being used. - Quite a few improvements to simplify the generic card drivers, especially the merge of the SCU cards into the main generic drivers. - Lots of fixes for probing on Intel systems, trying to rationalize things to look more standard from a framework point of view. - New drivers for Asahi Kasei Microdevices AK4497, Cirrus Logic CS4341, Google ChromeOS embedded controllers, Ingenic JZ4725B, MediaTek BTCVSD, MT8183 and MT6358, NXP MICFIL, Rockchip RK3328, Spreadtrum DMA controllers, Qualcomm WCD9335, Xilinx S/PDIF and PCM formatters.
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/inode.c3
-rw-r--r--fs/afs/protocol_yfs.h11
-rw-r--r--fs/afs/rxrpc.c53
-rw-r--r--fs/afs/server_list.c4
-rw-r--r--fs/afs/yfsclient.c2
-rw-r--r--fs/block_dev.c28
-rw-r--r--fs/btrfs/ctree.c16
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/volumes.c12
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/quota.c13
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/cifs/cifs_debug.c1
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h20
-rw-r--r--fs/cifs/cifssmb.c65
-rw-r--r--fs/cifs/connect.c28
-rw-r--r--fs/cifs/dfs_cache.c1
-rw-r--r--fs/cifs/file.c45
-rw-r--r--fs/cifs/inode.c10
-rw-r--r--fs/cifs/smb2file.c8
-rw-r--r--fs/cifs/smb2inode.c17
-rw-r--r--fs/cifs/smb2misc.c7
-rw-r--r--fs/cifs/smb2ops.c68
-rw-r--r--fs/cifs/smb2pdu.c54
-rw-r--r--fs/cifs/trace.c10
-rw-r--r--fs/cifs/trace.h10
-rw-r--r--fs/cifs/transport.c113
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/fs-writeback.c40
-rw-r--r--fs/hugetlbfs/inode.c61
-rw-r--r--fs/nfs/nfs4file.c8
-rw-r--r--fs/notify/inotify/inotify_user.c6
-rw-r--r--fs/pstore/ram.c12
-rw-r--r--fs/sysfs/dir.c3
-rw-r--r--fs/sysfs/file.c6
-rw-r--r--fs/sysfs/group.c3
-rw-r--r--fs/sysfs/symlink.c3
44 files changed, 611 insertions, 246 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 0568fd986821..e432bd27a2e7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -208,7 +208,7 @@ again:
/* The new front of the queue now owns the state variables. */
next = list_entry(vnode->pending_locks.next,
struct file_lock, fl_u.afs.link);
- vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_key = key_get(afs_file_key(next->fl_file));
vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
goto again;
@@ -413,7 +413,7 @@ static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
/* The new front of the queue now owns the state variables. */
next = list_entry(vnode->pending_locks.next,
struct file_lock, fl_u.afs.link);
- vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_key = key_get(afs_file_key(next->fl_file));
vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
afs_lock_may_be_available(vnode);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b17d3620414..1a4ce07fb406 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -414,7 +414,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
} else {
- vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
vnode->cb_v_break = vnode->volume->cb_v_break;
valid = false;
}
@@ -546,6 +545,8 @@ void afs_evict_inode(struct inode *inode)
#endif
afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
_leave("");
}
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index 07bc10f076aa..d443e2bfa094 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -161,3 +161,14 @@ struct yfs_xdr_YFSStoreVolumeStatus {
struct yfs_xdr_u64 max_quota;
struct yfs_xdr_u64 file_quota;
} __packed;
+
+enum yfs_lock_type {
+ yfs_LockNone = -1,
+ yfs_LockRead = 0,
+ yfs_LockWrite = 1,
+ yfs_LockExtend = 2,
+ yfs_LockRelease = 3,
+ yfs_LockMandatoryRead = 0x100,
+ yfs_LockMandatoryWrite = 0x101,
+ yfs_LockMandatoryExtend = 0x102,
+};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a7b44863d502..2c588f9bbbda 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -23,6 +23,7 @@ struct workqueue_struct *afs_async_calls;
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_delete_async_call(struct work_struct *);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -203,20 +204,26 @@ void afs_put_call(struct afs_call *call)
}
}
+static struct afs_call *afs_get_call(struct afs_call *call,
+ enum afs_call_trace why)
+{
+ int u = atomic_inc_return(&call->usage);
+
+ trace_afs_call(call, why, u,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+ return call;
+}
+
/*
* Queue the call for actual work.
*/
static void afs_queue_call_work(struct afs_call *call)
{
if (call->type->work) {
- int u = atomic_inc_return(&call->usage);
-
- trace_afs_call(call, afs_call_trace_work, u,
- atomic_read(&call->net->nr_outstanding_calls),
- __builtin_return_address(0));
-
INIT_WORK(&call->work, call->type->work);
+ afs_get_call(call, afs_call_trace_work);
if (!queue_work(afs_wq, &call->work))
afs_put_call(call);
}
@@ -398,6 +405,12 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
}
}
+ /* If the call is going to be asynchronous, we need an extra ref for
+ * the call to hold itself so the caller need not hang on to its ref.
+ */
+ if (call->async)
+ afs_get_call(call, afs_call_trace_get);
+
/* create a call */
rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
(unsigned long)call,
@@ -438,15 +451,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
goto error_do_abort;
}
- /* at this point, an async call may no longer exist as it may have
- * already completed */
- if (call->async)
+ /* Note that at this point, we may have received the reply or an abort
+ * - and an asynchronous call may already have completed.
+ */
+ if (call->async) {
+ afs_put_call(call);
return -EINPROGRESS;
+ }
return afs_wait_for_call_to_complete(call, ac);
error_do_abort:
- call->state = AFS_CALL_COMPLETE;
if (ret != -ECONNABORTED) {
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret, "KSD");
@@ -463,8 +478,24 @@ error_do_abort:
error_kill_call:
if (call->type->done)
call->type->done(call);
- afs_put_call(call);
+
+ /* We need to dispose of the extra ref we grabbed for an async call.
+ * The call, however, might be queued on afs_async_calls and we need to
+ * make sure we don't get any more notifications that might requeue it.
+ */
+ if (call->rxcall) {
+ rxrpc_kernel_end_call(call->net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->async) {
+ if (cancel_work_sync(&call->async_work))
+ afs_put_call(call);
+ afs_put_call(call);
+ }
+
ac->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ afs_put_call(call);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 95d0761cdb34..155dc14caef9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -42,9 +42,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
if (vldb->fs_mask[i] & type_mask)
nr_servers++;
- slist = kzalloc(sizeof(struct afs_server_list) +
- sizeof(struct afs_server_entry) * nr_servers,
- GFP_KERNEL);
+ slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
if (!slist)
goto error;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 12658c1363ae..5aa57929e8c2 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -803,7 +803,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
bp = xdr_encode_YFSFid(bp, &vnode->fid);
bp = xdr_encode_string(bp, name, namesz);
bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
- bp = xdr_encode_u32(bp, 0); /* ViceLockType */
+ bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c546cdce77e6..58a4c1217fa8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -104,6 +104,20 @@ void invalidate_bdev(struct block_device *bdev)
}
EXPORT_SYMBOL(invalidate_bdev);
+static void set_init_blocksize(struct block_device *bdev)
+{
+ unsigned bsize = bdev_logical_block_size(bdev);
+ loff_t size = i_size_read(bdev->bd_inode);
+
+ while (bsize < PAGE_SIZE) {
+ if (size & bsize)
+ break;
+ bsize <<= 1;
+ }
+ bdev->bd_block_size = bsize;
+ bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+}
+
int set_blocksize(struct block_device *bdev, int size)
{
/* Size must be a power of two, and between 512 and PAGE_SIZE */
@@ -1431,18 +1445,9 @@ EXPORT_SYMBOL(check_disk_change);
void bd_set_size(struct block_device *bdev, loff_t size)
{
- unsigned bsize = bdev_logical_block_size(bdev);
-
inode_lock(bdev->bd_inode);
i_size_write(bdev->bd_inode, size);
inode_unlock(bdev->bd_inode);
- while (bsize < PAGE_SIZE) {
- if (size & bsize)
- break;
- bsize <<= 1;
- }
- bdev->bd_block_size = bsize;
- bdev->bd_inode->i_blkbits = blksize_bits(bsize);
}
EXPORT_SYMBOL(bd_set_size);
@@ -1519,8 +1524,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret)
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+ set_init_blocksize(bdev);
+ }
/*
* If the device is invalidated, rescan partition
@@ -1555,6 +1562,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
+ set_init_blocksize(bdev);
}
if (bdev->bd_bdi == &noop_backing_dev_info)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d92462fe66c8..f64aad613727 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1016,19 +1016,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
parent_start = parent->start;
/*
- * If we are COWing a node/leaf from the extent, chunk or device trees,
- * make sure that we do not finish block group creation of pending block
- * groups. We do this to avoid a deadlock.
+ * If we are COWing a node/leaf from the extent, chunk, device or free
+ * space trees, make sure that we do not finish block group creation of
+ * pending block groups. We do this to avoid a deadlock.
* COWing can result in allocation of a new chunk, and flushing pending
* block groups (btrfs_create_pending_block_groups()) can be triggered
* when finishing allocation of a new chunk. Creation of a pending block
- * group modifies the extent, chunk and device trees, therefore we could
- * deadlock with ourselves since we are holding a lock on an extent
- * buffer that btrfs_create_pending_block_groups() may try to COW later.
+ * group modifies the extent, chunk, device and free space trees,
+ * therefore we could deadlock with ourselves since we are holding a
+ * lock on an extent buffer that btrfs_create_pending_block_groups() may
+ * try to COW later.
*/
if (root == fs_info->extent_root ||
root == fs_info->chunk_root ||
- root == fs_info->dev_root)
+ root == fs_info->dev_root ||
+ root == fs_info->free_space_root)
trans->can_flush_pending_bgs = false;
cow = btrfs_alloc_tree_block(trans, root, parent_start,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0a68cf7032f5..7a2a2621f0d9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
+struct btrfs_delayed_ref_root;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
@@ -786,6 +787,9 @@ enum {
* main phase. The fs_info::balance_ctl is initialized.
*/
BTRFS_FS_BALANCE_RUNNING,
+
+ /* Indicate that the cleaner thread is awake and doing something. */
+ BTRFS_FS_CLEANER_RUNNING,
};
struct btrfs_fs_info {
@@ -2661,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
unsigned long count, u64 transid, int wait);
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head);
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8da2f380d3c0..6a2a2a951705 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg)
while (1) {
again = 0;
+ set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
+
/* Make the cleaner go to sleep early. */
if (btrfs_need_cleaner_sleep(fs_info))
goto sleep;
@@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(fs_info);
sleep:
+ clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
if (kthread_should_park())
kthread_parkme();
if (kthread_should_stop())
@@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->ordered_root_lock);
}
spin_unlock(&fs_info->ordered_root_lock);
+
+ /*
+ * We need this here because if we've been flipped read-only we won't
+ * get sync() from the umount, so we need to make sure any ordered
+ * extents that haven't had their dirty pages IO start writeout yet
+ * actually get run and error out properly.
+ */
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
}
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (pin_bytes)
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
+ btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
btrfs_put_delayed_ref_head(head);
cond_resched();
spin_lock(&delayed_refs->lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b15afeae16df..d81035b7ea7d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
return ret ? ret : 1;
}
-static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head)
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_delayed_ref_root *delayed_refs =
- &trans->transaction->delayed_refs;
int nr_items = 1; /* Dropping this ref head update. */
if (head->total_ref_mod < 0) {
@@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
}
}
- cleanup_ref_head_accounting(trans, head);
+ btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
trace_run_delayed_ref_head(fs_info, head, 0);
btrfs_delayed_ref_unlock(head);
@@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info,
ret = 0;
break;
case COMMIT_TRANS:
+ /*
+ * If we have pending delayed iputs then we could free up a
+ * bunch of pinned space, so make sure we run the iputs before
+ * we do our pinned bytes check below.
+ */
+ mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
+ btrfs_run_delayed_iputs(fs_info);
+ mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+
ret = may_commit_transaction(fs_info, space_info);
break;
default:
@@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
if (head->must_insert_reserved)
ret = 1;
- cleanup_ref_head_accounting(trans, head);
+ btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 43eb4535319d..5c349667c761 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3129,9 +3129,6 @@ out:
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
- /* Try to release some metadata so we don't get an OOM but don't wait */
- btrfs_btree_balance_dirty_nodelay(fs_info);
-
return ret;
}
@@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
ASSERT(list_empty(&binode->delayed_iput));
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
+ if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
+ wake_up_process(fs_info->cleaner_kthread);
}
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fab9443f6a42..9c8e1734429c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
inode_lock_nested(inode2, I_MUTEX_CHILD);
}
+static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
+static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ if (inode1 < inode2) {
+ swap(inode1, inode2);
+ swap(loff1, loff2);
+ } else if (inode1 == inode2 && loff2 < loff1) {
+ swap(loff1, loff2);
+ }
+ lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
struct inode *dst, u64 dst_loff)
{
@@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
return -EINVAL;
/*
- * Lock destination range to serialize with concurrent readpages().
+ * Lock destination range to serialize with concurrent readpages() and
+ * source range to serialize with relocation.
*/
- lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
+ btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
- unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
+ btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
return ret;
}
@@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
len = ALIGN(src->i_size, bs) - off;
if (destoff > inode->i_size) {
+ const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
+
ret = btrfs_cont_expand(inode, inode->i_size, destoff);
if (ret)
return ret;
+ /*
+ * We may have truncated the last block if the inode's size is
+ * not sector size aligned, so we need to wait for writeback to
+ * complete before proceeding further, otherwise we can race
+ * with cloning and attempt to increment a reference to an
+ * extent that no longer exists (writeback completed right after
+ * we found the previous extent covering eof and before we
+ * attempted to increment its reference count).
+ */
+ ret = btrfs_wait_ordered_range(inode, wb_start,
+ destoff - wb_start);
+ if (ret)
+ return ret;
}
/*
- * Lock destination range to serialize with concurrent readpages().
+ * Lock destination range to serialize with concurrent readpages() and
+ * source range to serialize with relocation.
*/
- lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
+ btrfs_double_extent_lock(src, off, inode, destoff, len);
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
- unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
+ btrfs_double_extent_unlock(src, off, inode, destoff, len);
/*
* Truncate page cache pages so that future reads will see the cloned
* data immediately and not the previous data.
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2576b1a379c9..3e4f8f88353e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
ret = -EUCLEAN;
goto out;
}
+
+ /* It's possible this device is a dummy for seed device */
+ if (dev->disk_total_bytes == 0) {
+ dev = find_device(fs_info->fs_devices->seed, devid, NULL);
+ if (!dev) {
+ btrfs_err(fs_info, "failed to find seed devid %llu",
+ devid);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+
if (physical_offset + physical_len > dev->disk_total_bytes) {
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5d0c05e288cc..a47c541f8006 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1494,10 +1494,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
if (err < 0 || off >= i_size_read(inode)) {
unlock_page(page);
put_page(page);
- if (err == -ENOMEM)
- ret = VM_FAULT_OOM;
- else
- ret = VM_FAULT_SIGBUS;
+ ret = vmf_error(err);
goto out_inline;
}
if (err < PAGE_SIZE)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 94c026bba2c2..bba28a5034ba 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1035,6 +1035,8 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci)
list_del_init(&ci->i_snap_realm_item);
ci->i_snap_realm_counter++;
ci->i_snap_realm = NULL;
+ if (realm->ino == ci->i_vino.ino)
+ realm->inode = NULL;
spin_unlock(&realm->inodes_with_caps_lock);
ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
realm);
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 03f4d24db8fe..9455d3aef0c3 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -3,19 +3,6 @@
* quota.c - CephFS quota
*
* Copyright (C) 2017-2018 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/statfs.h>
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e9a7cc488da..da2cd8e89062 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_putc(m, ',');
pos = m->count;
- ret = ceph_print_client_options(m, fsc->client);
+ ret = ceph_print_client_options(m, fsc->client, false);
if (ret)
return ret;
@@ -640,7 +640,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
opt = NULL; /* fsc->client now owns this */
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->osdc.abort_on_full = true;
+ ceph_set_opt(fsc->client, ABORT_ON_FULL);
if (!fsopt->mds_namespace) {
ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 593fb422d0f3..e92a2fee3c57 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -252,6 +252,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_printf(m, ",ACL");
#endif
seq_putc(m, '\n');
+ seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
seq_printf(m, "Servers:");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 26776eddd85d..d1f9c2f3f575 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.15"
+#define CIFS_VERSION "2.16"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 01ded7038b19..94dbdbe5be34 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1438,6 +1438,7 @@ struct mid_q_entry {
int mid_state; /* wish this were enum but can not pass to wait_event */
unsigned int mid_flags;
__le16 command; /* smb command code */
+ unsigned int optype; /* operation type */
bool large_buf:1; /* if valid response, is pointer to large buf */
bool multiRsp:1; /* multiple trans2 responses for one request */
bool multiEnd:1; /* both received */
@@ -1574,6 +1575,25 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
kfree(param);
}
+static inline bool is_interrupt_error(int error)
+{
+ switch (error) {
+ case -EINTR:
+ case -ERESTARTSYS:
+ case -ERESTARTNOHAND:
+ case -ERESTARTNOINTR:
+ return true;
+ }
+ return false;
+}
+
+static inline bool is_retryable_error(int error)
+{
+ if (is_interrupt_error(error) || error == -EAGAIN)
+ return true;
+ return false;
+}
+
#define MID_FREE 0
#define MID_REQUEST_ALLOCATED 1
#define MID_REQUEST_SUBMITTED 2
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b1f49c1c543a..bb54ccf8481c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,24 +128,31 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
int rc;
struct dfs_cache_tgt_list tl;
struct dfs_cache_tgt_iterator *it = NULL;
- char tree[MAX_TREE_SIZE + 1];
+ char *tree;
const char *tcp_host;
size_t tcp_host_len;
const char *dfs_host;
size_t dfs_host_len;
+ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+ if (!tree)
+ return -ENOMEM;
+
if (tcon->ipc) {
- snprintf(tree, sizeof(tree), "\\\\%s\\IPC$",
+ snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
tcon->ses->server->hostname);
- return CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+ rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+ goto out;
}
- if (!tcon->dfs_path)
- return CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ if (!tcon->dfs_path) {
+ rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ goto out;
+ }
rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
if (rc)
- return rc;
+ goto out;
extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
&tcp_host_len);
@@ -165,7 +172,7 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
continue;
}
- snprintf(tree, sizeof(tree), "\\%s", tgt);
+ snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
if (!rc)
@@ -182,6 +189,8 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
rc = -ENOENT;
}
dfs_cache_free_tgts(&tl);
+out:
+ kfree(tree);
return rc;
}
#else
@@ -1540,18 +1549,26 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server)
}
static int
-cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+__cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
+ bool malformed)
{
int length;
- struct cifs_readdata *rdata = mid->callback_data;
length = cifs_discard_remaining_data(server);
- dequeue_mid(mid, rdata->result);
+ dequeue_mid(mid, malformed);
mid->resp_buf = server->smallbuf;
server->smallbuf = NULL;
return length;
}
+static int
+cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+ struct cifs_readdata *rdata = mid->callback_data;
+
+ return __cifs_readv_discard(server, mid, rdata->result);
+}
+
int
cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
{
@@ -1593,12 +1610,23 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
return -1;
}
+ /* set up first two iov for signature check and to get credits */
+ rdata->iov[0].iov_base = buf;
+ rdata->iov[0].iov_len = 4;
+ rdata->iov[1].iov_base = buf + 4;
+ rdata->iov[1].iov_len = server->total_read - 4;
+ cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
+ rdata->iov[0].iov_base, rdata->iov[0].iov_len);
+ cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
+ rdata->iov[1].iov_base, rdata->iov[1].iov_len);
+
/* Was the SMB read successful? */
rdata->result = server->ops->map_error(buf, false);
if (rdata->result != 0) {
cifs_dbg(FYI, "%s: server returned error %d\n",
__func__, rdata->result);
- return cifs_readv_discard(server, mid);
+ /* normal error on read response */
+ return __cifs_readv_discard(server, mid, false);
}
/* Is there enough to get to the rest of the READ_RSP header? */
@@ -1642,14 +1670,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
server->total_read += length;
}
- /* set up first iov for signature check */
- rdata->iov[0].iov_base = buf;
- rdata->iov[0].iov_len = 4;
- rdata->iov[1].iov_base = buf + 4;
- rdata->iov[1].iov_len = server->total_read - 4;
- cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n",
- rdata->iov[0].iov_base, server->total_read);
-
/* how much data is in the response? */
#ifdef CONFIG_CIFS_SMB_DIRECT
use_rdma_mr = rdata->mr;
@@ -2114,7 +2134,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
for (j = 0; j < nr_pages; j++) {
unlock_page(wdata2->pages[j]);
- if (rc != 0 && rc != -EAGAIN) {
+ if (rc != 0 && !is_retryable_error(rc)) {
SetPageError(wdata2->pages[j]);
end_page_writeback(wdata2->pages[j]);
put_page(wdata2->pages[j]);
@@ -2123,7 +2143,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
if (rc) {
kref_put(&wdata2->refcount, cifs_writedata_release);
- if (rc == -EAGAIN)
+ if (is_retryable_error(rc))
continue;
break;
}
@@ -2132,7 +2152,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
i += nr_pages;
} while (i < wdata->nr_pages);
- mapping_set_error(inode->i_mapping, rc);
+ if (rc != 0 && !is_retryable_error(rc))
+ mapping_set_error(inode->i_mapping, rc);
kref_put(&wdata->refcount, cifs_writedata_release);
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f66529679ca2..8463c940e0e5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -433,9 +433,10 @@ static void reconn_inval_dfs_target(struct TCP_Server_Info *server,
kfree(server->hostname);
server->hostname = extract_hostname(name);
- if (!server->hostname) {
- cifs_dbg(FYI, "%s: failed to extract hostname from target: %d\n",
- __func__, -ENOMEM);
+ if (IS_ERR(server->hostname)) {
+ cifs_dbg(FYI,
+ "%s: failed to extract hostname from target: %ld\n",
+ __func__, PTR_ERR(server->hostname));
}
}
@@ -719,6 +720,21 @@ server_unresponsive(struct TCP_Server_Info *server)
return false;
}
+static inline bool
+zero_credits(struct TCP_Server_Info *server)
+{
+ int val;
+
+ spin_lock(&server->req_lock);
+ val = server->credits + server->echo_credits + server->oplock_credits;
+ if (server->in_flight == 0 && val == 0) {
+ spin_unlock(&server->req_lock);
+ return true;
+ }
+ spin_unlock(&server->req_lock);
+ return false;
+}
+
static int
cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
{
@@ -731,6 +747,12 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
try_to_freeze();
+ /* reconnect if no credits and no requests in flight */
+ if (zero_credits(server)) {
+ cifs_reconnect(server);
+ return -ECONNABORTED;
+ }
+
if (server_unresponsive(server))
return -ECONNABORTED;
if (cifs_rdma_enabled(server) && server->smbd_conn)
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index cd63c4a70875..09b7d0d4f6e4 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -776,6 +776,7 @@ static int get_tgt_list(const struct dfs_cache_entry *ce,
it->it_name = kstrndup(t->t_name, strlen(t->t_name),
GFP_KERNEL);
if (!it->it_name) {
+ kfree(it);
rc = -ENOMEM;
goto err_free_it;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e3e3a7550205..2c7689f3998d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -733,7 +733,8 @@ reopen_success:
if (can_flush) {
rc = filemap_write_and_wait(inode->i_mapping);
- mapping_set_error(inode->i_mapping, rc);
+ if (!is_interrupt_error(rc))
+ mapping_set_error(inode->i_mapping, rc);
if (tcon->unix_ext)
rc = cifs_get_inode_info_unix(&inode, full_path,
@@ -1132,14 +1133,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf) {
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
free_xid(xid);
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1472,12 +1477,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -2110,6 +2119,7 @@ static int cifs_writepages(struct address_space *mapping,
pgoff_t end, index;
struct cifs_writedata *wdata;
int rc = 0;
+ int saved_rc = 0;
unsigned int xid;
/*
@@ -2138,8 +2148,10 @@ retry:
rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
&wsize, &credits);
- if (rc)
+ if (rc != 0) {
+ done = true;
break;
+ }
tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
@@ -2147,6 +2159,7 @@ retry:
&found_pages);
if (!wdata) {
rc = -ENOMEM;
+ done = true;
add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -2175,7 +2188,7 @@ retry:
if (rc != 0) {
add_credits_and_wake_if(server, wdata->credits, 0);
for (i = 0; i < nr_pages; ++i) {
- if (rc == -EAGAIN)
+ if (is_retryable_error(rc))
redirty_page_for_writepage(wbc,
wdata->pages[i]);
else
@@ -2183,7 +2196,7 @@ retry:
end_page_writeback(wdata->pages[i]);
put_page(wdata->pages[i]);
}
- if (rc != -EAGAIN)
+ if (!is_retryable_error(rc))
mapping_set_error(mapping, rc);
}
kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2193,6 +2206,15 @@ retry:
continue;
}
+ /* Return immediately if we received a signal during writing */
+ if (is_interrupt_error(rc)) {
+ done = true;
+ break;
+ }
+
+ if (rc != 0 && saved_rc == 0)
+ saved_rc = rc;
+
wbc->nr_to_write -= nr_pages;
if (wbc->nr_to_write <= 0)
done = true;
@@ -2210,6 +2232,9 @@ retry:
goto retry;
}
+ if (saved_rc != 0)
+ rc = saved_rc;
+
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;
@@ -2242,8 +2267,8 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
set_page_writeback(page);
retry_write:
rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
- if (rc == -EAGAIN) {
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (is_retryable_error(rc)) {
+ if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
goto retry_write;
redirty_page_for_writepage(wbc, page);
} else if (rc != 0) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 13fb59aadebc..478003644916 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2257,6 +2257,11 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
+ if (is_interrupt_error(rc)) {
+ rc = -ERESTARTSYS;
+ goto out;
+ }
+
mapping_set_error(inode->i_mapping, rc);
rc = 0;
@@ -2400,6 +2405,11 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
+ if (is_interrupt_error(rc)) {
+ rc = -ERESTARTSYS;
+ goto cifs_setattr_exit;
+ }
+
mapping_set_error(inode->i_mapping, rc);
rc = 0;
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 4ed10dd086e6..b204e84b87fb 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -122,12 +122,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < sizeof(struct smb2_lock_element))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
@@ -264,6 +266,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index f14533da3a93..01a76bccdb8d 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -293,6 +293,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
struct smb2_file_all_info *smb2_data;
__u32 create_options = 0;
+ struct cifs_fid fid;
+ bool no_cached_open = tcon->nohandlecache;
*adjust_tz = false;
*symlink = false;
@@ -301,6 +303,21 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
+
+ /* If it is a root and its handle is cached then use it */
+ if (!strlen(full_path) && !no_cached_open) {
+ rc = open_shroot(xid, tcon, &fid);
+ if (rc)
+ goto out;
+ rc = SMB2_query_info(xid, tcon, fid.persistent_fid,
+ fid.volatile_fid, smb2_data);
+ close_shroot(&tcon->crfid);
+ if (rc)
+ goto out;
+ move_smb2_info_to_cifs(data, smb2_data);
+ goto out;
+ }
+
if (backup_cred(cifs_sb))
create_options |= CREATE_OPEN_BACKUP_INTENT;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 6a9c47541c53..7b8b58fb4d3f 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -648,6 +648,13 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK)
return false;
+ if (rsp->sync_hdr.CreditRequest) {
+ spin_lock(&server->req_lock);
+ server->credits += le16_to_cpu(rsp->sync_hdr.CreditRequest);
+ spin_unlock(&server->req_lock);
+ wake_up(&server->request_q);
+ }
+
if (rsp->StructureSize !=
smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
if (le16_to_cpu(rsp->StructureSize) == 44)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index cf7eb891804f..153238fc4fa9 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -34,6 +34,7 @@
#include "cifs_ioctl.h"
#include "smbdirect.h"
+/* Change credits for different ops and return the total number of credits */
static int
change_conf(struct TCP_Server_Info *server)
{
@@ -41,17 +42,15 @@ change_conf(struct TCP_Server_Info *server)
server->oplock_credits = server->echo_credits = 0;
switch (server->credits) {
case 0:
- return -1;
+ return 0;
case 1:
server->echoes = false;
server->oplocks = false;
- cifs_dbg(VFS, "disabling echoes and oplocks\n");
break;
case 2:
server->echoes = true;
server->oplocks = false;
server->echo_credits = 1;
- cifs_dbg(FYI, "disabling oplocks\n");
break;
default:
server->echoes = true;
@@ -64,14 +63,15 @@ change_conf(struct TCP_Server_Info *server)
server->echo_credits = 1;
}
server->credits -= server->echo_credits + server->oplock_credits;
- return 0;
+ return server->credits + server->echo_credits + server->oplock_credits;
}
static void
smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
const int optype)
{
- int *val, rc = 0;
+ int *val, rc = -1;
+
spin_lock(&server->req_lock);
val = server->ops->get_credits_field(server, optype);
@@ -101,8 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
}
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- if (rc)
- cifs_reconnect(server);
+
+ if (server->tcpStatus == CifsNeedReconnect)
+ return;
+
+ switch (rc) {
+ case -1:
+ /* change_conf hasn't been executed */
+ break;
+ case 0:
+ cifs_dbg(VFS, "Possible client or server bug - zero credits\n");
+ break;
+ case 1:
+ cifs_dbg(VFS, "disabling echoes and oplocks\n");
+ break;
+ case 2:
+ cifs_dbg(FYI, "disabling oplocks\n");
+ break;
+ default:
+ cifs_dbg(FYI, "add %u credits total=%d\n", add, rc);
+ }
}
static void
@@ -136,7 +154,11 @@ smb2_get_credits(struct mid_q_entry *mid)
{
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf;
- return le16_to_cpu(shdr->CreditRequest);
+ if (mid->mid_state == MID_RESPONSE_RECEIVED
+ || mid->mid_state == MID_RESPONSE_MALFORMED)
+ return le16_to_cpu(shdr->CreditRequest);
+
+ return 0;
}
static int
@@ -165,14 +187,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
scredits = server->credits;
/* can deadlock with reopen */
- if (scredits == 1) {
+ if (scredits <= 8) {
*num = SMB2_MAX_BUFFER_SIZE;
*credits = 0;
break;
}
- /* leave one credit for a possible reopen */
- scredits--;
+ /* leave some credits for reopen and other ops */
+ scredits -= 8;
*num = min_t(unsigned int, size,
scredits * SMB2_MAX_BUFFER_SIZE);
@@ -3189,11 +3211,23 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
server->ops->is_status_pending(buf, server, 0))
return -1;
- rdata->result = server->ops->map_error(buf, false);
+ /* set up first two iov to get credits */
+ rdata->iov[0].iov_base = buf;
+ rdata->iov[0].iov_len = 4;
+ rdata->iov[1].iov_base = buf + 4;
+ rdata->iov[1].iov_len =
+ min_t(unsigned int, buf_len, server->vals->read_rsp_size) - 4;
+ cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
+ rdata->iov[0].iov_base, rdata->iov[0].iov_len);
+ cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
+ rdata->iov[1].iov_base, rdata->iov[1].iov_len);
+
+ rdata->result = server->ops->map_error(buf, true);
if (rdata->result != 0) {
cifs_dbg(FYI, "%s: server returned error %d\n",
__func__, rdata->result);
- dequeue_mid(mid, rdata->result);
+ /* normal error on read response */
+ dequeue_mid(mid, false);
return 0;
}
@@ -3266,14 +3300,6 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
return 0;
}
- /* set up first iov for signature check */
- rdata->iov[0].iov_base = buf;
- rdata->iov[0].iov_len = 4;
- rdata->iov[1].iov_base = buf + 4;
- rdata->iov[1].iov_len = server->vals->read_rsp_size - 4;
- cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
- rdata->iov[0].iov_base, server->vals->read_rsp_size);
-
length = rdata->copy_into_pages(server, rdata, &iter);
kfree(bvec);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e57f6aa1d638..2ff209ec4fab 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -162,24 +162,31 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
int rc;
struct dfs_cache_tgt_list tl;
struct dfs_cache_tgt_iterator *it = NULL;
- char tree[MAX_TREE_SIZE + 1];
+ char *tree;
const char *tcp_host;
size_t tcp_host_len;
const char *dfs_host;
size_t dfs_host_len;
+ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+ if (!tree)
+ return -ENOMEM;
+
if (tcon->ipc) {
- snprintf(tree, sizeof(tree), "\\\\%s\\IPC$",
+ snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
tcon->ses->server->hostname);
- return SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+ rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+ goto out;
}
- if (!tcon->dfs_path)
- return SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ if (!tcon->dfs_path) {
+ rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ goto out;
+ }
rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
if (rc)
- return rc;
+ goto out;
extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
&tcp_host_len);
@@ -199,7 +206,7 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
continue;
}
- snprintf(tree, sizeof(tree), "\\%s", tgt);
+ snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
if (!rc)
@@ -216,6 +223,8 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
rc = -ENOENT;
}
dfs_cache_free_tgts(&tl);
+out:
+ kfree(tree);
return rc;
}
#else
@@ -2807,6 +2816,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
int resp_buftype = CIFS_NO_BUFFER;
struct cifs_ses *ses = tcon->ses;
int flags = 0;
+ bool allocated = false;
cifs_dbg(FYI, "Query Info\n");
@@ -2846,14 +2856,21 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
"Error %d allocating memory for acl\n",
rc);
*dlen = 0;
+ rc = -ENOMEM;
goto qinf_exit;
}
+ allocated = true;
}
}
rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
le32_to_cpu(rsp->OutputBufferLength),
&rsp_iov, min_len, *data);
+ if (rc && allocated) {
+ kfree(*data);
+ *data = NULL;
+ *dlen = 0;
+ }
qinf_exit:
SMB2_query_info_free(&rqst);
@@ -2907,9 +2924,10 @@ smb2_echo_callback(struct mid_q_entry *mid)
{
struct TCP_Server_Info *server = mid->callback_data;
struct smb2_echo_rsp *rsp = (struct smb2_echo_rsp *)mid->resp_buf;
- unsigned int credits_received = 1;
+ unsigned int credits_received = 0;
- if (mid->mid_state == MID_RESPONSE_RECEIVED)
+ if (mid->mid_state == MID_RESPONSE_RECEIVED
+ || mid->mid_state == MID_RESPONSE_MALFORMED)
credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
DeleteMidQEntry(mid);
@@ -3166,7 +3184,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
struct TCP_Server_Info *server = tcon->ses->server;
struct smb2_sync_hdr *shdr =
(struct smb2_sync_hdr *)rdata->iov[0].iov_base;
- unsigned int credits_received = 1;
+ unsigned int credits_received = 0;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
.rq_pages = rdata->pages,
@@ -3205,6 +3223,9 @@ smb2_readv_callback(struct mid_q_entry *mid)
task_io_account_read(rdata->got_bytes);
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
+ case MID_RESPONSE_MALFORMED:
+ credits_received = le16_to_cpu(shdr->CreditRequest);
+ /* fall through */
default:
if (rdata->result != -ENODATA)
rdata->result = -EIO;
@@ -3278,12 +3299,14 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = shdr->CreditCharge;
+ shdr->CreditRequest =
+ cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
spin_lock(&server->req_lock);
server->credits += rdata->credits -
le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
+ rdata->credits = le16_to_cpu(shdr->CreditCharge);
flags |= CIFS_HAS_CREDITS;
}
@@ -3388,7 +3411,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
unsigned int written;
struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
- unsigned int credits_received = 1;
+ unsigned int credits_received = 0;
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
@@ -3416,6 +3439,9 @@ smb2_writev_callback(struct mid_q_entry *mid)
case MID_RETRY_NEEDED:
wdata->result = -EAGAIN;
break;
+ case MID_RESPONSE_MALFORMED:
+ credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
+ /* fall through */
default:
wdata->result = -EIO;
break;
@@ -3555,12 +3581,14 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (wdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = shdr->CreditCharge;
+ shdr->CreditRequest =
+ cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
spin_lock(&server->req_lock);
server->credits += wdata->credits -
le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
+ wdata->credits = le16_to_cpu(shdr->CreditCharge);
flags |= CIFS_HAS_CREDITS;
}
diff --git a/fs/cifs/trace.c b/fs/cifs/trace.c
index bd4a546feec1..465483787193 100644
--- a/fs/cifs/trace.c
+++ b/fs/cifs/trace.c
@@ -3,16 +3,6 @@
* Copyright (C) 2018, Microsoft Corporation.
*
* Author(s): Steve French <stfrench@microsoft.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index fb049809555f..59be48206932 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -3,16 +3,6 @@
* Copyright (C) 2018, Microsoft Corporation.
*
* Author(s): Steve French <stfrench@microsoft.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cifs
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 5be7302853b6..53532bd3f50d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -387,7 +387,7 @@ smbd_done:
if (rc < 0 && rc != -EINTR)
cifs_dbg(VFS, "Error %d sending data on socket to server\n",
rc);
- else
+ else if (rc > 0)
rc = 0;
return rc;
@@ -783,8 +783,25 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
}
static void
-cifs_noop_callback(struct mid_q_entry *mid)
+cifs_compound_callback(struct mid_q_entry *mid)
+{
+ struct TCP_Server_Info *server = mid->server;
+
+ add_credits(server, server->ops->get_credits(mid), mid->optype);
+}
+
+static void
+cifs_compound_last_callback(struct mid_q_entry *mid)
{
+ cifs_compound_callback(mid);
+ cifs_wake_up_task(mid);
+}
+
+static void
+cifs_cancelled_callback(struct mid_q_entry *mid)
+{
+ cifs_compound_callback(mid);
+ DeleteMidQEntry(mid);
}
int
@@ -795,7 +812,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
int i, j, rc = 0;
int timeout, optype;
struct mid_q_entry *midQ[MAX_COMPOUND];
- unsigned int credits = 0;
+ bool cancelled_mid[MAX_COMPOUND] = {false};
+ unsigned int credits[MAX_COMPOUND] = {0};
char *buf;
timeout = flags & CIFS_TIMEOUT_MASK;
@@ -813,13 +831,31 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
return -ENOENT;
/*
- * Ensure that we do not send more than 50 overlapping requests
- * to the same server. We may make this configurable later or
- * use ses->maxReq.
+ * Ensure we obtain 1 credit per request in the compound chain.
+ * It can be optimized further by waiting for all the credits
+ * at once but this can wait long enough if we don't have enough
+ * credits due to some heavy operations in progress or the server
+ * not granting us much, so a fallback to the current approach is
+ * needed anyway.
*/
- rc = wait_for_free_request(ses->server, timeout, optype);
- if (rc)
- return rc;
+ for (i = 0; i < num_rqst; i++) {
+ rc = wait_for_free_request(ses->server, timeout, optype);
+ if (rc) {
+ /*
+ * We haven't sent an SMB packet to the server yet but
+ * we already obtained credits for i requests in the
+ * compound chain - need to return those credits back
+ * for future use. Note that we need to call add_credits
+ * multiple times to match the way we obtained credits
+ * in the first place and to account for in flight
+ * requests correctly.
+ */
+ for (j = 0; j < i; j++)
+ add_credits(ses->server, 1, optype);
+ return rc;
+ }
+ credits[i] = 1;
+ }
/*
* Make sure that we sign in the same order that we send on this socket
@@ -835,18 +871,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
for (j = 0; j < i; j++)
cifs_delete_mid(midQ[j]);
mutex_unlock(&ses->server->srv_mutex);
+
/* Update # of requests on wire to server */
- add_credits(ses->server, 1, optype);
+ for (j = 0; j < num_rqst; j++)
+ add_credits(ses->server, credits[j], optype);
return PTR_ERR(midQ[i]);
}
midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
+ midQ[i]->optype = optype;
/*
- * We don't invoke the callback compounds unless it is the last
- * request.
+ * Invoke callback for every part of the compound chain
+ * to calculate credits properly. Wake up this thread only when
+ * the last element is received.
*/
if (i < num_rqst - 1)
- midQ[i]->callback = cifs_noop_callback;
+ midQ[i]->callback = cifs_compound_callback;
+ else
+ midQ[i]->callback = cifs_compound_last_callback;
}
cifs_in_send_inc(ses->server);
rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
@@ -860,8 +902,20 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
mutex_unlock(&ses->server->srv_mutex);
- if (rc < 0)
+ if (rc < 0) {
+ /* Sending failed for some reason - return credits back */
+ for (i = 0; i < num_rqst; i++)
+ add_credits(ses->server, credits[i], optype);
goto out;
+ }
+
+ /*
+ * At this point the request is passed to the network stack - we assume
+ * that any credits taken from the server structure on the client have
+ * been spent and we can't return them back. Once we receive responses
+ * we will collect credits granted by the server in the mid callbacks
+ * and add those credits to the server structure.
+ */
/*
* Compounding is never used during session establish.
@@ -875,36 +929,34 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(ses->server, midQ[i]);
- if (rc != 0) {
+ if (rc != 0)
+ break;
+ }
+ if (rc != 0) {
+ for (; i < num_rqst; i++) {
cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
midQ[i]->mid, le16_to_cpu(midQ[i]->command));
send_cancel(ses->server, &rqst[i], midQ[i]);
spin_lock(&GlobalMid_Lock);
if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
- midQ[i]->callback = DeleteMidQEntry;
- spin_unlock(&GlobalMid_Lock);
- add_credits(ses->server, 1, optype);
- return rc;
+ midQ[i]->callback = cifs_cancelled_callback;
+ cancelled_mid[i] = true;
+ credits[i] = 0;
}
spin_unlock(&GlobalMid_Lock);
}
}
- for (i = 0; i < num_rqst; i++)
- if (midQ[i]->resp_buf)
- credits += ses->server->ops->get_credits(midQ[i]);
- if (!credits)
- credits = 1;
-
for (i = 0; i < num_rqst; i++) {
if (rc < 0)
goto out;
rc = cifs_sync_mid_result(midQ[i], ses->server);
if (rc != 0) {
- add_credits(ses->server, credits, optype);
- return rc;
+ /* mark this mid as cancelled to not free it below */
+ cancelled_mid[i] = true;
+ goto out;
}
if (!midQ[i]->resp_buf ||
@@ -951,9 +1003,10 @@ out:
* This is prevented above by using a noop callback that will not
* wake this thread except for the very last PDU.
*/
- for (i = 0; i < num_rqst; i++)
- cifs_delete_mid(midQ[i]);
- add_credits(ses->server, credits, optype);
+ for (i = 0; i < num_rqst; i++) {
+ if (!cancelled_mid[i])
+ cifs_delete_mid(midQ[i]);
+ }
return rc;
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index dbc1a1f080ce..ec2fb6fe6d37 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -679,6 +679,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
unsigned long fs_count; /* Number of filesystem-sized blocks */
int create;
unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
+ loff_t i_size;
/*
* If there was a memory error and we've overwritten all the
@@ -708,8 +709,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
*/
create = dio->op == REQ_OP_WRITE;
if (dio->flags & DIO_SKIP_HOLES) {
- if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
- i_blkbits))
+ i_size = i_size_read(dio->inode);
+ if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits)
create = 0;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b40168fcc94a..36855c1f8daf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
struct work_struct work;
};
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ down_write(&bdi->wb_switch_rwsem);
+}
+
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+ up_write(&bdi->wb_switch_rwsem);
+}
+
static void inode_switch_wbs_work_fn(struct work_struct *work)
{
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
bool switched = false;
/*
+ * If @inode switches cgwb membership while sync_inodes_sb() is
+ * being issued, sync_inodes_sb() might miss it. Synchronize.
+ */
+ down_read(&bdi->wb_switch_rwsem);
+
+ /*
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
* between unlocked_inode_to_wb_begin/end() are guaranteed to be
@@ -428,6 +445,8 @@ skip_switch:
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
+ up_read(&bdi->wb_switch_rwsem);
+
if (switched) {
wb_wakeup(new_wb);
wb_put(old_wb);
@@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (inode->i_state & I_WB_SWITCH)
return;
+ /*
+ * Avoid starting new switches while sync_inodes_sb() is in
+ * progress. Otherwise, if the down_write protected issue path
+ * blocks heavily, we might end up starting a large number of
+ * switches which will block on the rwsem.
+ */
+ if (!down_read_trylock(&bdi->wb_switch_rwsem))
+ return;
+
isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
if (!isw)
- return;
+ goto out_unlock;
/* find and pin the new wb */
rcu_read_lock();
@@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
- return;
+ goto out_unlock;
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
+out_unlock:
+ up_read(&bdi->wb_switch_rwsem);
}
/**
@@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init);
#else /* CONFIG_CGROUP_WRITEBACK */
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock)
@@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb)
return;
WARN_ON(!rwsem_is_locked(&sb->s_umount));
+ /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+ bdi_down_write_wb_switch_rwsem(bdi);
bdi_split_work_to_wbs(bdi, &work, false);
wb_wait_for_completion(bdi, &done);
+ bdi_up_write_wb_switch_rwsem(bdi);
wait_sb_inodes(sb);
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2fcea5f8225..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
* truncation is indicated by end of range being LLONG_MAX
* In this case, we first scan the range and release found pages.
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
- * maps and global counts.
+ * maps and global counts. Page faults can not race with truncation
+ * in this routine. hugetlb_no_page() prevents page faults in the
+ * truncated range. It checks i_size before allocation, and again after
+ * with the page table lock for the page held. The same lock must be
+ * acquired to unmap a page.
* hole punch is indicated if end is not LLONG_MAX
* In the hole punch case we scan the range and release found pages.
* Only when releasing a page is the associated region/reserv map
* deleted. The region/reserv map for ranges without associated
- * pages are not modified.
- *
- * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent
- * races with page faults.
- *
+ * pages are not modified. Page faults can race with hole punch.
+ * This is indicated if we find a mapped page.
* Note: If the passed end of range value is beyond the end of file, but
* not LLONG_MAX this routine still performs a hole punch operation.
*/
@@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
for (i = 0; i < pagevec_count(&pvec); ++i) {
struct page *page = pvec.pages[i];
+ u32 hash;
index = page->index;
+ hash = hugetlb_fault_mutex_hash(h, current->mm,
+ &pseudo_vma,
+ mapping, index, 0);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
/*
- * A mapped page is impossible as callers should unmap
- * all references before calling. And, i_mmap_rwsem
- * prevents the creation of additional mappings.
+ * If page is mapped, it was faulted in after being
+ * unmapped in caller. Unmap (again) now after taking
+ * the fault mutex. The mutex will prevent faults
+ * until we finish removing the page.
+ *
+ * This race can only happen in the hole punch case.
+ * Getting here in a truncate operation is a bug.
*/
- VM_BUG_ON(page_mapped(page));
+ if (unlikely(page_mapped(page))) {
+ BUG_ON(truncate_op);
+
+ i_mmap_lock_write(mapping);
+ hugetlb_vmdelete_list(&mapping->i_mmap,
+ index * pages_per_huge_page(h),
+ (index + 1) * pages_per_huge_page(h));
+ i_mmap_unlock_write(mapping);
+ }
lock_page(page);
/*
@@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
unlock_page(page);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
huge_pagevec_release(&pvec);
cond_resched();
@@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
static void hugetlbfs_evict_inode(struct inode *inode)
{
- struct address_space *mapping = inode->i_mapping;
struct resv_map *resv_map;
- /*
- * The vfs layer guarantees that there are no other users of this
- * inode. Therefore, it would be safe to call remove_inode_hugepages
- * without holding i_mmap_rwsem. We acquire and hold here to be
- * consistent with other callers. Since there will be no contention
- * on the semaphore, overhead is negligible.
- */
- i_mmap_lock_write(mapping);
remove_inode_hugepages(inode, 0, LLONG_MAX);
- i_mmap_unlock_write(mapping);
-
resv_map = (struct resv_map *)inode->i_mapping->private_data;
/* root inode doesn't have the resv_map, so we should check it */
if (resv_map)
@@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
i_mmap_lock_write(mapping);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
- remove_inode_hugepages(inode, offset, LLONG_MAX);
i_mmap_unlock_write(mapping);
+ remove_inode_hugepages(inode, offset, LLONG_MAX);
return 0;
}
@@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
hugetlb_vmdelete_list(&mapping->i_mmap,
hole_start >> PAGE_SHIFT,
hole_end >> PAGE_SHIFT);
- remove_inode_hugepages(inode, hole_start, hole_end);
i_mmap_unlock_write(mapping);
+ remove_inode_hugepages(inode, hole_start, hole_end);
inode_unlock(inode);
}
@@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
/* addr is the offset within the file (zero based) */
addr = index * hpage_size;
- /*
- * fault mutex taken here, protects against fault path
- * and hole punch. inode_lock previously taken protects
- * against truncation.
- */
+ /* mutex taken here, fault path and hole punch */
hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
index, addr);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 46d691ba04bc..45b2322e092d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,15 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
- ssize_t ret;
-
if (file_inode(file_in) == file_inode(file_out))
return -EINVAL;
-retry:
- ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
+ return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 105576daca4a..798f1253141a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -724,8 +724,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
return -EBADF;
/* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
- if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE)))
- return -EINVAL;
+ if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) {
+ ret = -EINVAL;
+ goto fput_and_out;
+ }
/* verify that this is indeed an inotify instance */
if (unlikely(f.file->f_op != &inotify_fops)) {
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 96f7d32cd184..898c8321b343 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -128,7 +128,6 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
struct pstore_record *record)
{
struct persistent_ram_zone *prz;
- bool update = (record->type == PSTORE_TYPE_DMESG);
/* Give up if we never existed or have hit the end. */
if (!przs)
@@ -139,7 +138,7 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
return NULL;
/* Update old/shadowed buffer. */
- if (update)
+ if (prz->type == PSTORE_TYPE_DMESG)
persistent_ram_save_old(prz);
if (!persistent_ram_old_size(prz))
@@ -711,18 +710,15 @@ static int ramoops_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct ramoops_platform_data *pdata = dev->platform_data;
+ struct ramoops_platform_data pdata_local;
struct ramoops_context *cxt = &oops_cxt;
size_t dump_mem_sz;
phys_addr_t paddr;
int err = -EINVAL;
if (dev_of_node(dev) && !pdata) {
- pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
- if (!pdata) {
- pr_err("cannot allocate platform data buffer\n");
- err = -ENOMEM;
- goto fail_out;
- }
+ pdata = &pdata_local;
+ memset(pdata, 0, sizeof(*pdata));
err = ramoops_parse_dt(pdev, pdata);
if (err < 0)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index feeae8081c22..aa85f2874a9f 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,7 +43,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj);
+ if (WARN_ON(!kobj))
+ return -EINVAL;
if (kobj->parent)
parent = kobj->parent->sd;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index bb71db63c99c..51398457fe00 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -325,7 +325,8 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj || !kobj->sd || !attr);
+ if (WARN_ON(!kobj || !kobj->sd || !attr))
+ return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
@@ -537,7 +538,8 @@ int sysfs_create_bin_file(struct kobject *kobj,
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj || !kobj->sd || !attr);
+ if (WARN_ON(!kobj || !kobj->sd || !attr))
+ return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1eb2d6307663..57038604d4a8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -112,7 +112,8 @@ static int internal_create_group(struct kobject *kobj, int update,
kgid_t gid;
int error;
- BUG_ON(!kobj || (!update && !kobj->sd));
+ if (WARN_ON(!kobj || (!update && !kobj->sd)))
+ return -EINVAL;
/* Updates may happen before the object has been instantiated */
if (unlikely(update && !kobj->sd))
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 215c225b2ca1..c4deecc80f67 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -23,7 +23,8 @@ static int sysfs_do_create_link_sd(struct kernfs_node *parent,
{
struct kernfs_node *kn, *target = NULL;
- BUG_ON(!name || !parent);
+ if (WARN_ON(!name || !parent))
+ return -EINVAL;
/*
* We don't own @target_kobj and it may be removed at any time.