diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/fsclient.c | 6 | ||||
-rw-r--r-- | fs/afs/yfsclient.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 4 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 3 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 49 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 33 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifsfs.h | 2 | ||||
-rw-r--r-- | fs/cifs/file.c | 148 | ||||
-rw-r--r-- | fs/cifs/smb2maperror.c | 3 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.c | 11 | ||||
-rw-r--r-- | fs/cifs/trace.h | 6 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 2 | ||||
-rw-r--r-- | fs/ext4/file.c | 2 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 43 | ||||
-rw-r--r-- | fs/ext4/inode.c | 30 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 7 | ||||
-rw-r--r-- | fs/ext4/resize.c | 17 | ||||
-rw-r--r-- | fs/ext4/super.c | 16 | ||||
-rw-r--r-- | fs/lockd/host.c | 3 | ||||
-rw-r--r-- | fs/locks.c | 5 | ||||
-rw-r--r-- | fs/nfs/client.c | 2 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 5 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 5 | ||||
-rw-r--r-- | fs/proc/kcore.c | 27 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 15 | ||||
-rw-r--r-- | fs/xfs/scrub/btree.c | 11 | ||||
-rw-r--r-- | fs/xfs/scrub/dabtree.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 27 |
33 files changed, 326 insertions, 179 deletions
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index ca08c83168f5..0b37867b5c20 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1515,8 +1515,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ - *bp++ = 0; + *bp++ = htonl(attr->ia_size >> 32); /* position of start of write */ + *bp++ = htonl((u32) attr->ia_size); *bp++ = 0; /* size of write */ *bp++ = 0; *bp++ = htonl(attr->ia_size >> 32); /* new file length */ @@ -1564,7 +1564,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ + *bp++ = htonl(attr->ia_size); /* position of start of write */ *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 5aa57929e8c2..6e97a42d24d1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -1514,7 +1514,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &vnode->fid); bp = xdr_encode_YFS_StoreStatus(bp, attr); - bp = xdr_encode_u64(bp, 0); /* position of start of write */ + bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */ bp = xdr_encode_u64(bp, 0); /* size of write */ bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ yfs_check_req(call, bp); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1d49694e6ae3..c5880329ae37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6174,7 +6174,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, * * This is overestimating in most cases. */ - qgroup_rsv_size = outstanding_extents * fs_info->nodesize; + qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; spin_lock(&block_rsv->lock); block_rsv->size = reserve_size; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index eb680b715dd6..e659d9d61107 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, int i; /* Level sanity check */ - if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL || - root_level < 0 || root_level >= BTRFS_MAX_LEVEL || + if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || + root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || root_level < cur_level) { btrfs_err_rl(fs_info, "%s: bad levels, cur_level=%d root_level=%d", diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 1869ba8e5981..67a6f7d47402 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2430,8 +2430,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, bitmap_clear(rbio->dbitmap, pagenr, 1); kunmap(p); - for (stripe = 0; stripe < rbio->real_stripes; stripe++) + for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); + kunmap(p_page); } __free_page(p_page); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index acdad6d658f5..e4e665f422fc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) } } -static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) +static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; + /* * We use writeback_inodes_sb here because if we used * btrfs_start_delalloc_roots we would deadlock with fs freeze. @@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) * from already being in a transaction and our join_transaction doesn't * have to re-take the fs freeze lock. */ - if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); + } else { + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + + /* + * Flush dellaloc for any root that is going to be snapshotted. + * This is done to avoid a corrupted version of files, in the + * snapshots, that had both buffered and direct IO writes (even + * if they were done sequentially) due to an unordered update of + * the inode's size on disk. + */ + list_for_each_entry(pending, head, list) { + int ret; + + ret = btrfs_start_delalloc_snapshot(pending->root); + if (ret) + return ret; + } + } return 0; } -static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) +static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans) { - if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) + struct btrfs_fs_info *fs_info = trans->fs_info; + + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); + } else { + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + + /* + * Wait for any dellaloc that we started previously for the roots + * that are going to be snapshotted. This is to avoid a corrupted + * version of files in the snapshots that had both buffered and + * direct IO writes (even if they were done sequentially). + */ + list_for_each_entry(pending, head, list) + btrfs_wait_ordered_extents(pending->root, + U64_MAX, 0, U64_MAX); + } } int btrfs_commit_transaction(struct btrfs_trans_handle *trans) @@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) extwriter_counter_dec(cur_trans, trans->type); - ret = btrfs_start_delalloc_flush(fs_info); + ret = btrfs_start_delalloc_flush(trans); if (ret) goto cleanup_transaction; @@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto cleanup_transaction; - btrfs_wait_delalloc_flush(fs_info); + btrfs_wait_delalloc_flush(trans); btrfs_scrub_pause(fs_info); /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f06454a55e00..561884f60d35 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3578,9 +3578,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - /* find the first key from this transaction again */ + /* + * Find the first key from this transaction again. See the note for + * log_new_dir_dentries, if we're logging a directory recursively we + * won't be holding its i_mutex, which means we can modify the directory + * while we're logging it. If we remove an entry between our first + * search and this search we'll not find the key again and can just + * bail. + */ ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); - if (WARN_ON(ret != 0)) + if (ret != 0) goto done; /* @@ -4544,6 +4551,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); *size_ret = btrfs_inode_size(path->nodes[0], item); + /* + * If the in-memory inode's i_size is smaller then the inode + * size stored in the btree, return the inode's i_size, so + * that we get a correct inode size after replaying the log + * when before a power failure we had a shrinking truncate + * followed by addition of a new name (rename / new hard link). + * Otherwise return the inode size from the btree, to avoid + * data loss when replaying a log due to previously doing a + * write that expands the inode's size and logging a new name + * immediately after. + */ + if (*size_ret > inode->vfs_inode.i_size) + *size_ret = inode->vfs_inode.i_size; } btrfs_release_path(path); @@ -4705,15 +4725,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(leaf, extent); - ASSERT(len == i_size || - (len == fs_info->sectorsize && - btrfs_file_extent_compression(leaf, extent) != - BTRFS_COMPRESS_NONE) || - (len < i_size && i_size < fs_info->sectorsize)); + BTRFS_FILE_EXTENT_INLINE) return 0; - } len = btrfs_file_extent_num_bytes(leaf, extent); /* Last extent goes beyond i_size, no need to log a hole. */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9024eee889b9..db934ceae9c1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6407,7 +6407,7 @@ static void btrfs_end_bio(struct bio *bio) if (bio_op(bio) == REQ_OP_WRITE) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - else + else if (!(bio->bi_opf & REQ_RAHEAD)) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); if (bio->bi_opf & REQ_PREFLUSH) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e3346628efe2..2d61ddda9bf5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -524,6 +524,7 @@ static void ceph_i_callback(struct rcu_head *head) struct inode *inode = container_of(head, struct inode, i_rcu); struct ceph_inode_info *ci = ceph_inode(inode); + kfree(ci->i_symlink); kmem_cache_free(ceph_inode_cachep, ci); } @@ -566,7 +567,6 @@ void ceph_destroy_inode(struct inode *inode) } } - kfree(ci->i_symlink); while ((n = rb_first(&ci->i_fragtree)) != NULL) { frag = rb_entry(n, struct ceph_inode_frag, node); rb_erase(n, &ci->i_fragtree); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 217276b8b942..f9b71c12cc9f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1008,7 +1008,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; - if (remap_flags & ~REMAP_FILE_ADVISORY) + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; cifs_dbg(FYI, "clone range\n"); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 142164ef1f05..5c0298b9998f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.18" +#define CIFS_VERSION "2.19" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2a6d20c0ce02..89006e044973 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2632,43 +2632,56 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, struct TCP_Server_Info *server = tlink_tcon(wdata->cfile->tlink)->ses->server; - /* - * Wait for credits to resend this wdata. - * Note: we are attempting to resend the whole wdata not in segments - */ do { - rc = server->ops->wait_mtu_credits(server, wdata->bytes, &wsize, - &credits); + if (wdata->cfile->invalidHandle) { + rc = cifs_reopen_file(wdata->cfile, false); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } - if (rc) - goto out; - if (wsize < wdata->bytes) { - add_credits_and_wake_if(server, &credits, 0); - msleep(1000); - } - } while (wsize < wdata->bytes); + /* + * Wait for credits to resend this wdata. + * Note: we are attempting to resend the whole wdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, wdata->bytes, + &wsize, &credits); + if (rc) + goto fail; + + if (wsize < wdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (wsize < wdata->bytes); + wdata->credits = credits; - wdata->credits = credits; - rc = -EAGAIN; - while (rc == -EAGAIN) { - rc = 0; - if (wdata->cfile->invalidHandle) - rc = cifs_reopen_file(wdata->cfile, false); - if (!rc) - rc = server->ops->async_writev(wdata, + rc = adjust_credits(server, &wdata->credits, wdata->bytes); + + if (!rc) { + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_writev(wdata, cifs_uncached_writedata_release); - } + } - if (!rc) { - list_add_tail(&wdata->list, wdata_list); - return 0; - } + /* If the write was successfully sent, we are done */ + if (!rc) { + list_add_tail(&wdata->list, wdata_list); + return 0; + } - add_credits_and_wake_if(server, &wdata->credits, 0); -out: - kref_put(&wdata->refcount, cifs_uncached_writedata_release); + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &wdata->credits, 0); + } while (rc == -EAGAIN); +fail: + kref_put(&wdata->refcount, cifs_uncached_writedata_release); return rc; } @@ -2896,12 +2909,12 @@ restart_loop: wdata->bytes, &tmp_from, ctx->cfile, cifs_sb, &tmp_list, ctx); + + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); } list_splice(&tmp_list, &ctx->list); - - kref_put(&wdata->refcount, - cifs_uncached_writedata_release); goto restart_loop; } } @@ -3348,44 +3361,55 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata, struct TCP_Server_Info *server = tlink_tcon(rdata->cfile->tlink)->ses->server; - /* - * Wait for credits to resend this rdata. - * Note: we are attempting to resend the whole rdata not in segments - */ do { - rc = server->ops->wait_mtu_credits(server, rdata->bytes, + if (rdata->cfile->invalidHandle) { + rc = cifs_reopen_file(rdata->cfile, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + /* + * Wait for credits to resend this rdata. + * Note: we are attempting to resend the whole rdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, rdata->bytes, &rsize, &credits); - if (rc) - goto out; + if (rc) + goto fail; - if (rsize < rdata->bytes) { - add_credits_and_wake_if(server, &credits, 0); - msleep(1000); - } - } while (rsize < rdata->bytes); + if (rsize < rdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (rsize < rdata->bytes); + rdata->credits = credits; - rdata->credits = credits; - rc = -EAGAIN; - while (rc == -EAGAIN) { - rc = 0; - if (rdata->cfile->invalidHandle) - rc = cifs_reopen_file(rdata->cfile, true); - if (!rc) - rc = server->ops->async_readv(rdata); - } + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_readv(rdata); + } - if (!rc) { - /* Add to aio pending list */ - list_add_tail(&rdata->list, rdata_list); - return 0; - } + /* If the read was successfully sent, we are done */ + if (!rc) { + /* Add to aio pending list */ + list_add_tail(&rdata->list, rdata_list); + return 0; + } - add_credits_and_wake_if(server, &rdata->credits, 0); -out: - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &rdata->credits, 0); + } while (rc == -EAGAIN); +fail: + kref_put(&rdata->refcount, cifs_uncached_readdata_release); return rc; } diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 924269cec135..e32c264e3adb 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -1036,7 +1036,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_UNFINISHED_CONTEXT_DELETED, -EIO, "STATUS_UNFINISHED_CONTEXT_DELETED"}, {STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"}, - {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"}, + /* Note that ENOATTTR and ENODATA are the same errno */ + {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"}, {STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"}, {STATUS_WRONG_CREDENTIAL_HANDLE, -EIO, "STATUS_WRONG_CREDENTIAL_HANDLE"}, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c399e09b76e6..21ac19ff19cb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1628,9 +1628,16 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, iov[1].iov_base = unc_path; iov[1].iov_len = unc_path_len; - /* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */ + /* + * 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 + * unless it is guest or anonymous user. See MS-SMB2 3.2.5.3.1 + * (Samba servers don't always set the flag so also check if null user) + */ if ((ses->server->dialect == SMB311_PROT_ID) && - !smb3_encryption_required(tcon)) + !smb3_encryption_required(tcon) && + !(ses->session_flags & + (SMB2_SESSION_FLAG_IS_GUEST|SMB2_SESSION_FLAG_IS_NULL)) && + ((ses->user_name != NULL) || (ses->sectype == Kerberos))) req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; memset(&rqst, 0, sizeof(struct smb_rqst)); diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index fa226de48ef3..99c4d799c24b 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -549,19 +549,19 @@ DECLARE_EVENT_CLASS(smb3_tcon_class, __field(unsigned int, xid) __field(__u32, tid) __field(__u64, sesid) - __field(const char *, unc_name) + __string(name, unc_name) __field(int, rc) ), TP_fast_assign( __entry->xid = xid; __entry->tid = tid; __entry->sesid = sesid; - __entry->unc_name = unc_name; + __assign_str(name, unc_name); __entry->rc = rc; ), TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d", __entry->xid, __entry->sesid, __entry->tid, - __entry->unc_name, __entry->rc) + __get_str(name), __entry->rc) ) #define DEFINE_SMB3_TCON_EVENT(name) \ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a1ac7e9245ec..75a5309f2231 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -384,7 +384,7 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, { struct ext4_inode_info *ei = EXT4_I(inode); - if (ext4_handle_valid(handle)) { + if (ext4_handle_valid(handle) && !is_handle_aborted(handle)) { ei->i_sync_tid = handle->h_transaction->t_tid; if (datasync) ei->i_datasync_tid = handle->h_transaction->t_tid; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 69d65d49837b..98ec11f69cd4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -125,7 +125,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) struct super_block *sb = inode->i_sb; int blockmask = sb->s_blocksize - 1; - if (pos >= i_size_read(inode)) + if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize)) return 0; if ((pos | iov_iter_alignment(from)) & blockmask) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index c2225f0d31b5..2024d3fa5504 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1222,6 +1222,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ext4_lblk_t offsets[4], offsets2[4]; Indirect chain[4], chain2[4]; Indirect *partial, *partial2; + Indirect *p = NULL, *p2 = NULL; ext4_lblk_t max_block; __le32 nr = 0, nr2 = 0; int n = 0, n2 = 0; @@ -1263,7 +1264,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, } - partial = ext4_find_shared(inode, n, offsets, chain, &nr); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); if (nr) { if (partial == chain) { /* Shared branch grows from the inode */ @@ -1288,13 +1289,11 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } end_range: - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); if (nr2) { if (partial2 == chain2) { /* @@ -1324,16 +1323,14 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } goto do_indirects; } /* Punch happened within the same level (n == n2) */ - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); /* Free top, but only if partial2 isn't its subtree. */ if (nr) { @@ -1390,11 +1387,7 @@ end_range: partial->p + 1, partial2->p, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); - return 0; + goto cleanup; } /* @@ -1409,8 +1402,6 @@ end_range: partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } if (partial2 > chain2 && depth2 <= depth) { @@ -1418,11 +1409,21 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } } + +cleanup: + while (p && p > chain) { + BUFFER_TRACE(p->bh, "call brelse"); + brelse(p->bh); + p--; + } + while (p2 && p2 > chain2) { + BUFFER_TRACE(p2->bh, "call brelse"); + brelse(p2->bh); + p2--; + } return 0; do_indirects: @@ -1430,7 +1431,7 @@ do_indirects: switch (offsets[0]) { default: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_IND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); @@ -1439,7 +1440,7 @@ do_indirects: /* fall through */ case EXT4_IND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_DIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); @@ -1448,7 +1449,7 @@ do_indirects: /* fall through */ case EXT4_DIND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_TIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); @@ -1458,5 +1459,5 @@ do_indirects: case EXT4_TIND_BLOCK: ; } - return 0; + goto cleanup; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b54b261ded36..b32a57bc5d5d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -6080,36 +6080,6 @@ out: return; } -#if 0 -/* - * Bind an inode's backing buffer_head into this transaction, to prevent - * it from being flushed to disk early. Unlike - * ext4_reserve_inode_write, this leaves behind no bh reference and - * returns no iloc structure, so the caller needs to repeat the iloc - * lookup to mark the inode dirty later. - */ -static int ext4_pin_inode(handle_t *handle, struct inode *inode) -{ - struct ext4_iloc iloc; - - int err = 0; - if (handle) { - err = ext4_get_inode_loc(inode, &iloc); - if (!err) { - BUFFER_TRACE(iloc.bh, "get_write_access"); - err = jbd2_journal_get_write_access(handle, iloc.bh); - if (!err) - err = ext4_handle_dirty_metadata(handle, - NULL, - iloc.bh); - brelse(iloc.bh); - } - } - ext4_std_error(inode->i_sb, err); - return err; -} -#endif - int ext4_change_inode_journal_flag(struct inode *inode, int val) { journal_t *journal; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 3c4f8bb59f8a..bab3da4f1e0d 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1000,6 +1000,13 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; + /* + * We haven't replayed the journal, so we cannot use our + * block-bitmap-guided storage zapping commands. + */ + if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) + return -EROFS; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3d9b18505c0c..e7ae26e36c9c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -932,11 +932,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb, memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); n_group_desc[gdb_num] = gdb_bh; + + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (err) { + kvfree(n_group_desc); + brelse(gdb_bh); + return err; + } + EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; kvfree(o_group_desc); - BUFFER_TRACE(gdb_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gdb_bh); return err; } @@ -2073,6 +2080,10 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); + if (err) + ext4_warning(sb, "error (%d) occurred during " + "file system resize", err); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", + ext4_blocks_count(es)); return err; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f5b828bf1299..6ed4eb81e674 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -430,6 +430,12 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) spin_unlock(&sbi->s_md_lock); } +static bool system_going_down(void) +{ + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF + || system_state == SYSTEM_RESTART; +} + /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -460,7 +466,12 @@ static void ext4_handle_error(struct super_block *sb) if (journal) jbd2_journal_abort(journal, -EIO); } - if (test_opt(sb, ERRORS_RO)) { + /* + * We force ERRORS_RO behavior when system is rebooting. Otherwise we + * could panic during 'reboot -f' as the underlying device got already + * disabled. + */ + if (test_opt(sb, ERRORS_RO) || system_going_down()) { ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* * Make sure updated value of ->s_mount_flags will be visible @@ -468,8 +479,7 @@ static void ext4_handle_error(struct super_block *sb) */ smp_wmb(); sb->s_flags |= SB_RDONLY; - } - if (test_opt(sb, ERRORS_PANIC)) { + } else if (test_opt(sb, ERRORS_PANIC)) { if (EXT4_SB(sb)->s_journal && !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) return; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 93fb7cf0b92b..f0b5c987d6ae 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -290,12 +290,11 @@ void nlmclnt_release_host(struct nlm_host *host) WARN_ON_ONCE(host->h_server); - if (refcount_dec_and_test(&host->h_count)) { + if (refcount_dec_and_mutex_lock(&host->h_count, &nlm_host_mutex)) { WARN_ON_ONCE(!list_empty(&host->h_lockowners)); WARN_ON_ONCE(!list_empty(&host->h_granted)); WARN_ON_ONCE(!list_empty(&host->h_reclaim)); - mutex_lock(&nlm_host_mutex); nlm_destroy_host_locked(host); mutex_unlock(&nlm_host_mutex); } diff --git a/fs/locks.c b/fs/locks.c index eaa1cfaf73b0..71d0c6c2aac5 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1160,6 +1160,11 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, */ error = -EDEADLK; spin_lock(&blocked_lock_lock); + /* + * Ensure that we don't find any locks blocked on this + * request during deadlock detection. + */ + __locks_wake_up_blocks(request); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; __locks_insert_block(fl, request, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fb1cf1a4bda2..90d71fda65ce 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -453,7 +453,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, case XPRT_TRANSPORT_RDMA: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; - if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0) + if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f9264e1922a2..6673d4ff5a2a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1289,6 +1289,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { + int new_idx = hdr->pgio_mirror_idx; int err; trace_nfs4_pnfs_read(hdr, task->tk_status); @@ -1307,7 +1308,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, case -NFS4ERR_RESET_TO_PNFS: if (ff_layout_choose_best_ds_for_read(hdr->lseg, hdr->pgio_mirror_idx + 1, - &hdr->pgio_mirror_idx)) + &new_idx)) goto out_layouterror; set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; @@ -1320,7 +1321,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return 0; out_layouterror: + ff_layout_read_record_layoutstats_done(task, hdr); ff_layout_send_layouterror(hdr->lseg); + hdr->pgio_mirror_idx = new_idx; out_eagain: rpc_restart_call_prepare(task); return -EAGAIN; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4dbb0ee23432..741ff8c9c6ed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2933,7 +2933,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: - nfs4_sequence_free_slot(&opendata->o_res.seq_res); + if (!opendata->cancelled) + nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; } @@ -6301,7 +6302,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.seqid = seqid; p->res.seqid = seqid; p->lsp = lsp; - refcount_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); p->l_ctx = nfs_get_lock_context(ctx); @@ -6526,7 +6526,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; p->server = server; - refcount_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); locks_init_lock(&p->fl); locks_copy_lock(&p->fl, fl); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index bbcc185062bb..d29d869abec1 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head); static DECLARE_RWSEM(kclist_lock); static int kcore_need_update = 1; +/* + * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error + * Same as oldmem_pfn_is_ram in vmcore + */ +static int (*mem_pfn_is_ram)(unsigned long pfn); + +int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)) +{ + if (mem_pfn_is_ram) + return -EBUSY; + mem_pfn_is_ram = fn; + return 0; +} + +static int pfn_is_ram(unsigned long pfn) +{ + if (mem_pfn_is_ram) + return mem_pfn_is_ram(pfn); + else + return 1; +} + /* This doesn't grab kclist_lock, so it should only be used at init time. */ void __init kclist_add(struct kcore_list *new, void *addr, size_t size, int type) @@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) goto out; } m = NULL; /* skip the list anchor */ + } else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) { + if (clear_user(buffer, tsz)) { + ret = -EFAULT; + goto out; + } } else if (m->type == KCORE_VMALLOC) { vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 48502cb9990f..4637ae1ae91c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1191,7 +1191,10 @@ xfs_iread_extents( * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); + if (unlikely(level == 0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); @@ -4249,9 +4252,13 @@ xfs_bmapi_write( struct xfs_bmbt_irec *mval, /* output: map values */ int *nmap) /* i/o: mval size/count */ { + struct xfs_bmalloca bma = { + .tp = tp, + .ip = ip, + .total = total, + }; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; - struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ xfs_fileoff_t end; /* end of mapped file region */ bool eof = false; /* after the end of extents */ int error; /* error return */ @@ -4319,10 +4326,6 @@ xfs_bmapi_write( eof = true; if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) bma.prev.br_startoff = NULLFILEOFF; - bma.tp = tp; - bma.ip = ip; - bma.total = total; - bma.datatype = 0; bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); n = 0; diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 6f94d1f7322d..117910db51b8 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -415,8 +415,17 @@ xchk_btree_check_owner( struct xfs_btree_cur *cur = bs->cur; struct check_owner *co; - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) + /* + * In theory, xfs_btree_get_block should only give us a null buffer + * pointer for the root of a root-in-inode btree type, but we need + * to check defensively here in case the cursor state is also screwed + * up. + */ + if (bp == NULL) { + if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)) + xchk_btree_set_corrupt(bs->sc, bs->cur, level); return 0; + } /* * We want to cross-reference each btree block with the bnobt diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index f1260b4bfdee..90527b094878 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -574,6 +574,11 @@ xchk_da_btree( /* Drill another level deeper. */ blkno = be32_to_cpu(key->before); level++; + if (level >= XFS_DA_NODE_MAXDEPTH) { + /* Too deep! */ + xchk_da_set_corrupt(&ds, level - 1); + break; + } ds.tree_level--; error = xchk_da_btree_block(&ds, level, blkno); if (error) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 93f07edafd81..9ee2a7d02e70 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -161,6 +161,14 @@ xfs_ioc_trim( return -EPERM; if (!blk_queue_discard(q)) return -EOPNOTSUPP; + + /* + * We haven't recovered the log, so we cannot use our bnobt-guided + * storage zapping commands. + */ + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + return -EROFS; + if (copy_from_user(&range, urange, sizeof(range))) return -EFAULT; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1f2e2845eb76..a7ceae90110e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -529,18 +529,17 @@ xfs_file_dio_aio_write( count = iov_iter_count(from); /* - * If we are doing unaligned IO, wait for all other IO to drain, - * otherwise demote the lock if we had to take the exclusive lock - * for other reasons in xfs_file_aio_write_checks. + * If we are doing unaligned IO, we can't allow any other overlapping IO + * in-flight at the same time or we risk data corruption. Wait for all + * other IO to drain before we submit. If the IO is aligned, demote the + * iolock if we had to take the exclusive lock in + * xfs_file_aio_write_checks() for other reasons. */ if (unaligned_io) { - /* If we are going to wait for other DIO to finish, bail */ - if (iocb->ki_flags & IOCB_NOWAIT) { - if (atomic_read(&inode->i_dio_count)) - return -EAGAIN; - } else { - inode_dio_wait(inode); - } + /* unaligned dio always waits, bail */ + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_dio_wait(inode); } else if (iolock == XFS_IOLOCK_EXCL) { xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; @@ -548,6 +547,14 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos); ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); + + /* + * If unaligned, this is the only IO in-flight. If it has not yet + * completed, wait on it before we release the iolock to prevent + * subsequent overlapping IO. + */ + if (ret == -EIOCBQUEUED && unaligned_io) + inode_dio_wait(inode); out: xfs_iunlock(ip, iolock); |