diff options
Diffstat (limited to 'fs')
37 files changed, 329 insertions, 179 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index adaf6f6dd858..e1cbdfdb7c68 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -310,9 +310,13 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping, p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); - if (unlikely(copied < len && !PageUptodate(page))) { - copied = 0; - goto out; + if (!PageUptodate(page)) { + if (unlikely(copied < len)) { + copied = 0; + goto out; + } else if (len == PAGE_SIZE) { + SetPageUptodate(page); + } } /* * No need to use i_size_read() here, the i_size diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 2a46762def31..a7c5a9861bef 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -596,7 +596,7 @@ static void bm_evict_inode(struct inode *inode) { Node *e = inode->i_private; - if (e->flags & MISC_FMT_OPEN_FILE) + if (e && e->flags & MISC_FMT_OPEN_FILE) filp_close(e->interp_file, NULL); clear_inode(inode); diff --git a/fs/block_dev.c b/fs/block_dev.c index 93d088ffc05c..789f55e851ae 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -716,10 +716,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); - if (result) + if (result) { end_page_writeback(page); - else + } else { + clean_page_buffers(page); unlock_page(page); + } blk_queue_exit(bdev->bd_queue); return result; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 35a128acfbd1..161694b66038 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1135,7 +1135,7 @@ static int btrfs_fill_super(struct super_block *sb, #ifdef CONFIG_BTRFS_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; err = super_setup_bdi(sb); diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 018c588c7ac3..8e704d12a1cf 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -109,6 +109,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info, goto out; } ukp = user_key_payload_locked(keyring_key); + if (!ukp) { + /* key was revoked before we acquired its semaphore */ + res = -EKEYREVOKED; + goto out; + } if (ukp->datalen != sizeof(struct fscrypt_key)) { res = -EINVAL; goto out; diff --git a/fs/direct-io.c b/fs/direct-io.c index 62cf812ed0e5..b53e66d9abd7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -45,6 +45,12 @@ #define DIO_PAGES 64 /* + * Flags for dio_complete() + */ +#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */ +#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */ + +/* * This code generally works in units of "dio_blocks". A dio_block is * somewhere between the hard sector size and the filesystem block size. it * is determined on a per-invocation basis. When talking to the filesystem @@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio, * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) +static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) { loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; @@ -259,14 +265,27 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (ret == 0) ret = transferred; + if (dio->end_io) { + // XXX: ki_pos?? + err = dio->end_io(dio->iocb, offset, ret, dio->private); + if (err) + ret = err; + } + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source * of the write was an mmap'ed region of the file we're writing. Either * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. */ - if (ret > 0 && dio->op == REQ_OP_WRITE && + if (flags & DIO_COMPLETE_INVALIDATE && + ret > 0 && dio->op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, @@ -274,18 +293,10 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) WARN_ON_ONCE(err); } - if (dio->end_io) { - - // XXX: ki_pos?? - err = dio->end_io(dio->iocb, offset, ret, dio->private); - if (err) - ret = err; - } - if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); - if (is_async) { + if (flags & DIO_COMPLETE_ASYNC) { /* * generic_write_sync expects ki_pos to have been updated * already, but the submission path only does this for @@ -306,7 +317,7 @@ static void dio_aio_complete_work(struct work_struct *work) { struct dio *dio = container_of(work, struct dio, complete_work); - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE); } static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); @@ -348,7 +359,7 @@ static void dio_bio_end_aio(struct bio *bio) queue_work(dio->inode->i_sb->s_dio_done_wq, &dio->complete_work); } else { - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC); } } } @@ -866,7 +877,8 @@ out: */ if (sdio->boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); - dio_bio_submit(dio, sdio); + if (sdio->bio) + dio_bio_submit(dio, sdio); put_page(sdio->cur_page); sdio->cur_page = NULL; } @@ -1359,7 +1371,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio_await_completion(dio); if (drop_refcount(dio) == 0) { - retval = dio_complete(dio, retval, false); + retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE); } else BUG_ON(retval != -EIOCBQUEUED); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 9c351bf757b2..3fbc0ff79699 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -84,11 +84,16 @@ struct ecryptfs_page_crypt_context { static inline struct ecryptfs_auth_tok * ecryptfs_get_encrypted_key_payload_data(struct key *key) { - if (key->type == &key_type_encrypted) - return (struct ecryptfs_auth_tok *) - (&((struct encrypted_key_payload *)key->payload.data[0])->payload_data); - else + struct encrypted_key_payload *payload; + + if (key->type != &key_type_encrypted) return NULL; + + payload = key->payload.data[0]; + if (!payload) + return ERR_PTR(-EKEYREVOKED); + + return (struct ecryptfs_auth_tok *)payload->payload_data; } static inline struct key *ecryptfs_get_encrypted_key(char *sig) @@ -114,12 +119,17 @@ static inline struct ecryptfs_auth_tok * ecryptfs_get_key_payload_data(struct key *key) { struct ecryptfs_auth_tok *auth_tok; + struct user_key_payload *ukp; auth_tok = ecryptfs_get_encrypted_key_payload_data(key); - if (!auth_tok) - return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data; - else + if (auth_tok) return auth_tok; + + ukp = user_key_payload_locked(key); + if (!ukp) + return ERR_PTR(-EKEYREVOKED); + + return (struct ecryptfs_auth_tok *)ukp->data; } #define ECRYPTFS_MAX_KEYSET_SIZE 1024 diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3cf1546dca82..fa218cd64f74 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -459,7 +459,8 @@ out: * @auth_tok_key: key containing the authentication token * @auth_tok: authentication token * - * Returns zero on valid auth tok; -EINVAL otherwise + * Returns zero on valid auth tok; -EINVAL if the payload is invalid; or + * -EKEYREVOKED if the key was revoked before we acquired its semaphore. */ static int ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key, @@ -468,6 +469,12 @@ ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key, int rc = 0; (*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key); + if (IS_ERR(*auth_tok)) { + rc = PTR_ERR(*auth_tok); + *auth_tok = NULL; + goto out; + } + if (ecryptfs_verify_version((*auth_tok)->version)) { printk(KERN_ERR "Data structure version mismatch. Userspace " "tools must match eCryptfs kernel module with major " diff --git a/fs/exec.c b/fs/exec.c index 5470d3c1892a..3e14ba25f678 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; + membarrier_execve(current); acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b104096fce9e..b0915b734a38 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1677,7 +1677,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; return 1; case Opt_i_version: - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; return 1; case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; @@ -2060,7 +2060,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); - if (sb->s_flags & MS_I_VERSION) + if (sb->s_flags & SB_I_VERSION) SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a7c90386947..4b4a72f392be 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2525,7 +2525,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 621b9b3d320b..c695ff462ee6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1210,11 +1210,11 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) { __issue_discard_cmd(sbi, false); __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, false); + __wait_discard_cmd(sbi, !umount); } static void mark_discard_range_all(struct f2fs_sb_info *sbi) @@ -2244,7 +2244,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) } /* It's time to issue all the filed discards */ mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi); + f2fs_wait_discard_bios(sbi, false); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 89f61eb3d167..933c3d529e65 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -801,7 +801,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi); + f2fs_wait_discard_bios(sbi, true); if (f2fs_discard_en(sbi) && !sbi->discard_blks) { struct cp_control cpc = { diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index b5ab06fabc60..0438d4cd91ef 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -331,6 +331,13 @@ static void fscache_objlist_config(struct fscache_objlist_data *data) rcu_read_lock(); confkey = user_key_payload_rcu(key); + if (!confkey) { + /* key was revoked */ + rcu_read_unlock(); + key_put(key); + goto no_config; + } + buf = confkey->data; for (len = confkey->datalen - 1; len >= 0; len--) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 65c88379a3a1..94a745acaef8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1059,7 +1059,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_flags & MS_MANDLOCK) goto err; - sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); + sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION); if (!parse_fuse_opt(data, &d, is_bdev)) goto err; diff --git a/fs/iomap.c b/fs/iomap.c index be61cf742b5e..d4801f8dd4fd 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) { struct kiocb *iocb = dio->iocb; struct inode *inode = file_inode(iocb->ki_filp); + loff_t offset = iocb->ki_pos; ssize_t ret; - /* - * Try again to invalidate clean pages which might have been cached by - * non-direct readahead, or faulted in by get_user_pages() if the source - * of the write was an mmap'ed region of the file we're writing. Either - * one is a pretty crazy thing to do, so we don't support it 100%. If - * this invalidation fails, tough, the write still worked... - */ - if (!dio->error && - (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { - ret = invalidate_inode_pages2_range(inode->i_mapping, - iocb->ki_pos >> PAGE_SHIFT, - (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - if (dio->end_io) { ret = dio->end_io(iocb, dio->error ? dio->error : dio->size, @@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) if (likely(!ret)) { ret = dio->size; /* check for short read */ - if (iocb->ki_pos + ret > dio->i_size && + if (offset + ret > dio->i_size && !(dio->flags & IOMAP_DIO_WRITE)) - ret = dio->i_size - iocb->ki_pos; + ret = dio->i_size - offset; iocb->ki_pos += ret; } + /* + * Try again to invalidate clean pages which might have been cached by + * non-direct readahead, or faulted in by get_user_pages() if the source + * of the write was an mmap'ed region of the file we're writing. Either + * one is a pretty crazy thing to do, so we don't support it 100%. If + * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. + */ + if (!dio->error && + (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { + int err; + err = invalidate_inode_pages2_range(inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + dio->size - 1) >> PAGE_SHIFT); + WARN_ON_ONCE(err); + } + inode_dio_end(file_inode(iocb->ki_filp)); kfree(dio); diff --git a/fs/mpage.c b/fs/mpage.c index 37bb77c1302c..c991faec70b9 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -468,6 +468,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped) try_to_free_buffers(page); } +/* + * For situations where we want to clean all buffers attached to a page. + * We don't need to calculate how many buffers are attached to the page, + * we just need to specify a number larger than the maximum number of buffers. + */ +void clean_page_buffers(struct page *page) +{ + clean_buffers(page, ~0U); +} + static int __mpage_writepage(struct page *page, struct writeback_control *wbc, void *data) { @@ -605,10 +615,8 @@ alloc_new: if (bio == NULL) { if (first_unmapped == blocks_per_page) { if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), - page, wbc)) { - clean_buffers(page, first_unmapped); + page, wbc)) goto out; - } } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH); diff --git a/fs/namespace.c b/fs/namespace.c index 3b601f115b6c..d18deb4c410b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2825,7 +2825,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_MANDLOCK | SB_DIRSYNC | SB_SILENT | - SB_POSIXACL); + SB_POSIXACL | + SB_I_VERSION); if (flags & MS_REMOUNT) retval = do_remount(&path, flags, sb_flags, mnt_flags, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3c69db7d4905..8487486ec496 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -927,6 +927,13 @@ nfsd4_secinfo_release(union nfsd4_op_u *u) exp_put(u->secinfo.si_exp); } +static void +nfsd4_secinfo_no_name_release(union nfsd4_op_u *u) +{ + if (u->secinfo_no_name.sin_exp) + exp_put(u->secinfo_no_name.sin_exp); +} + static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -2375,7 +2382,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = nfsd4_secinfo_no_name, - .op_release = nfsd4_secinfo_release, + .op_release = nfsd4_secinfo_no_name_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", .op_rsize_bop = nfsd4_secinfo_rsize, diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 50b0556a124f..52ad15192e72 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1297,21 +1297,18 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, spin_lock(&dquot->dq_dqb_lock); if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) - goto add; + goto finish; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace + space + rsv_space; - if (flags & DQUOT_SPACE_NOFAIL) - goto add; - if (dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); ret = -EDQUOT; - goto out; + goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1322,7 +1319,7 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); ret = -EDQUOT; - goto out; + goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1338,13 +1335,21 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, * be always printed */ ret = -EDQUOT; - goto out; + goto finish; } } -add: - dquot->dq_dqb.dqb_rsvspace += rsv_space; - dquot->dq_dqb.dqb_curspace += space; -out: +finish: + /* + * We have to be careful and go through warning generation & grace time + * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it + * only here... + */ + if (flags & DQUOT_SPACE_NOFAIL) + ret = 0; + if (!ret) { + dquot->dq_dqb.dqb_rsvspace += rsv_space; + dquot->dq_dqb.dqb_curspace += space; + } spin_unlock(&dquot->dq_dqb_lock); return ret; } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 744dcaec34cc..f965ce832bc0 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1584,6 +1584,10 @@ xfs_alloc_ag_vextent_small( bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto error0; + } xfs_trans_binval(args->tp, bp); } args->len = 1; @@ -2141,6 +2145,10 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto out_agbp_relse; + } xfs_trans_binval(tp, bp); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 044a363119be..89263797cf32 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1477,14 +1477,14 @@ xfs_bmap_isaeof( int is_empty; int error; - bma->aeof = 0; + bma->aeof = false; error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, &is_empty); if (error) return error; if (is_empty) { - bma->aeof = 1; + bma->aeof = true; return 0; } @@ -3852,6 +3852,17 @@ xfs_trim_extent( } } +/* trim extent to within eof */ +void +xfs_trim_extent_eof( + struct xfs_bmbt_irec *irec, + struct xfs_inode *ip) + +{ + xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, + i_size_read(VFS_I(ip)))); +} + /* * Trim the returned map to the required bounds */ diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 851982a5dfbc..502e0d8fb4ff 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); +void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 988bb3f31446..dfd643909f85 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1962,7 +1962,7 @@ xfs_difree_inobt( if (!(mp->m_flags & XFS_MOUNT_IKEEP) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { - xic->deleted = 1; + xic->deleted = true; xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); @@ -1989,7 +1989,7 @@ xfs_difree_inobt( xfs_difree_inode_chunk(mp, agno, &rec, dfops); } else { - xic->deleted = 0; + xic->deleted = false; error = xfs_inobt_update(cur, &rec); if (error) { diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 8372e9bcd7b6..71de185735e0 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format { uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ + uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ @@ -280,29 +281,17 @@ typedef struct xfs_inode_log_format { int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_t; -typedef struct xfs_inode_log_format_32 { - uint16_t ilf_type; /* inode log item type */ - uint16_t ilf_size; /* size of this item */ - uint32_t ilf_fields; /* flags for fields logged */ - uint16_t ilf_asize; /* size of attr d/ext/root */ - uint16_t ilf_dsize; /* size of data/ext/root */ - uint64_t ilf_ino; /* inode number */ - union { - uint32_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ - } ilf_u; - int64_t ilf_blkno; /* blkno of inode buffer */ - int32_t ilf_len; /* len of inode buffer */ - int32_t ilf_boffset; /* off of inode in buffer */ -} __attribute__((packed)) xfs_inode_log_format_32_t; - -typedef struct xfs_inode_log_format_64 { +/* + * Old 32 bit systems will log in this format without the 64 bit + * alignment padding. Recovery will detect this and convert it to the + * correct format. + */ +struct xfs_inode_log_format_32 { uint16_t ilf_type; /* inode log item type */ uint16_t ilf_size; /* size of this item */ uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ - uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ @@ -311,7 +300,7 @@ typedef struct xfs_inode_log_format_64 { int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ int32_t ilf_boffset; /* off of inode in buffer */ -} xfs_inode_log_format_64_t; +} __attribute__((packed)); /* diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 7034e17535de..3354140de07e 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_t mode) int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + umode_t mode; + bool set_mode = false; int error = 0; if (!acl) @@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; if (type == ACL_TYPE_ACCESS) { - umode_t mode; - error = posix_acl_update_mode(inode, &mode, &acl); if (error) return error; - error = xfs_set_mode(inode, mode); - if (error) - return error; + set_mode = true; } set_acl: - return __xfs_set_acl(inode, acl, type); + error = __xfs_set_acl(inode, acl, type); + if (error) + return error; + + /* + * We set the mode after successfully updating the ACL xattr because the + * xattr update can fail at ENOSPC and we don't want to change the mode + * if the ACL update hasn't been applied. + */ + if (set_mode) + error = xfs_set_mode(inode, mode); + + return error; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f18e5932aec4..a3eeaba156c5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -446,6 +446,19 @@ xfs_imap_valid( { offset >>= inode->i_blkbits; + /* + * We have to make sure the cached mapping is within EOF to protect + * against eofblocks trimming on file release leaving us with a stale + * mapping. Otherwise, a page for a subsequent file extending buffered + * write could get picked up by this writeback cycle and written to the + * wrong blocks. + * + * Note that what we really want here is a generic mapping invalidation + * mechanism to protect us from arbitrary extent modifying contexts, not + * just eofblocks. + */ + xfs_trim_extent_eof(imap, XFS_I(inode)); + return offset >= imap->br_startoff && offset < imap->br_startoff + imap->br_blockcount; } @@ -735,6 +748,14 @@ xfs_vm_invalidatepage( { trace_xfs_invalidatepage(page->mapping->host, page, offset, length); + + /* + * If we are invalidating the entire page, clear the dirty state from it + * so that we can check for attempts to release dirty cached pages in + * xfs_vm_releasepage(). + */ + if (offset == 0 && length >= PAGE_SIZE) + cancel_dirty_page(page); block_invalidatepage(page, offset, length); } @@ -1190,25 +1211,27 @@ xfs_vm_releasepage( * mm accommodates an old ext3 case where clean pages might not have had * the dirty bit cleared. Thus, it can send actual dirty pages to * ->releasepage() via shrink_active_list(). Conversely, - * block_invalidatepage() can send pages that are still marked dirty - * but otherwise have invalidated buffers. + * block_invalidatepage() can send pages that are still marked dirty but + * otherwise have invalidated buffers. * * We want to release the latter to avoid unnecessary buildup of the - * LRU, skip the former and warn if we've left any lingering - * delalloc/unwritten buffers on clean pages. Skip pages with delalloc - * or unwritten buffers and warn if the page is not dirty. Otherwise - * try to release the buffers. + * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages + * that are entirely invalidated and need to be released. Hence the + * only time we should get dirty pages here is through + * shrink_active_list() and so we can simply skip those now. + * + * warn if we've left any lingering delalloc/unwritten buffers on clean + * or invalidated pages we are about to release. */ + if (PageDirty(page)) + return 0; + xfs_count_page_state(page, &delalloc, &unwritten); - if (delalloc) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(delalloc)) return 0; - } - if (unwritten) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(unwritten)) return 0; - } return try_to_free_buffers(page); } diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index ebd66b19fbfc..e3a950ed35a8 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -302,6 +302,8 @@ xfs_attr3_node_inactive( &bp, XFS_ATTR_FORK); if (error) return error; + node = bp->b_addr; + btree = dp->d_ops->node_tree_p(node); child_fsb = be32_to_cpu(btree[i + 1].before); xfs_trans_brelse(*trans, bp); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e9db7fc95b70..6503cfa44262 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -84,6 +84,7 @@ xfs_zero_extent( GFP_NOFS, 0); } +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -190,6 +191,7 @@ xfs_bmap_rtalloc( } return 0; } +#endif /* CONFIG_XFS_RT */ /* * Check if the endoff is outside the last extent. If so the caller will grow diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 0eaa81dc49be..7d330b3c77c3 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -28,7 +28,20 @@ struct xfs_mount; struct xfs_trans; struct xfs_bmalloca; +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); +#else /* !CONFIG_XFS_RT */ +/* + * Attempts to allocate RT extents when RT is disable indicates corruption and + * should trigger a shutdown. + */ +static inline int +xfs_bmap_rtalloc(struct xfs_bmalloca *ap) +{ + return -EFSCORRUPTED; +} +#endif /* CONFIG_XFS_RT */ + int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 309e26c9dddb..56d0e526870c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -764,7 +764,7 @@ xfs_file_fallocate( enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL; loff_t new_size = 0; - bool do_file_insert = 0; + bool do_file_insert = false; if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -825,7 +825,7 @@ xfs_file_fallocate( error = -EINVAL; goto out_unlock; } - do_file_insert = 1; + do_file_insert = true; } else { flags |= XFS_PREALLOC_SET; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 814ed729881d..43cfc07996a4 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper( return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); } -/* Transform a rtbitmap "record" into a fsmap */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap_helper( - struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, - void *priv) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_getfsmap_info *info = priv; - struct xfs_rmap_irec irec; - xfs_daddr_t rec_daddr; - - rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); - - irec.rm_startblock = rec->ar_startblock; - irec.rm_blockcount = rec->ar_blockcount; - irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ - irec.rm_offset = 0; - irec.rm_flags = 0; - - return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); -} - /* Transform a bnobt irec into a fsmap */ STATIC int xfs_getfsmap_datadev_bnobt_helper( @@ -475,6 +452,30 @@ xfs_getfsmap_logdev( return xfs_getfsmap_helper(tp, info, &rmap, 0); } +#ifdef CONFIG_XFS_RT +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); +} + /* Execute a getfsmap query against the realtime device. */ STATIC int __xfs_getfsmap_rtdev( @@ -561,6 +562,7 @@ xfs_getfsmap_rtdev_rtbitmap( return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, info); } +#endif /* CONFIG_XFS_RT */ /* Execute a getfsmap query against the regular data device. */ STATIC int @@ -795,7 +797,15 @@ xfs_getfsmap_check_keys( return false; } +/* + * There are only two devices if we didn't configure RT devices at build time. + */ +#ifdef CONFIG_XFS_RT #define XFS_GETFSMAP_DEVS 3 +#else +#define XFS_GETFSMAP_DEVS 2 +#endif /* CONFIG_XFS_RT */ + /* * Get filesystem's extents as described in head, and format for * output. Calls formatter to fill the user's buffer until all @@ -853,10 +863,12 @@ xfs_getfsmap( handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); handlers[1].fn = xfs_getfsmap_logdev; } +#ifdef CONFIG_XFS_RT if (mp->m_rtdev_targp) { handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; } +#endif /* CONFIG_XFS_RT */ xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), xfs_getfsmap_dev_compare); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index a705f34b58fa..9bbc2d7cc8cb 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -364,6 +364,9 @@ xfs_inode_to_log_dinode( to->di_dmstate = from->di_dmstate; to->di_flags = from->di_flags; + /* log a dummy value to ensure log structure is fully initialised */ + to->di_next_unlinked = NULLAGINO; + if (from->di_version == 3) { to->di_changecount = inode->i_version; to->di_crtime.t_sec = from->di_crtime.t_sec; @@ -404,6 +407,11 @@ xfs_inode_item_format_core( * the second with the on-disk inode structure, and a possible third and/or * fourth with the inode data/extents/b-tree root and inode attributes * data/extents/b-tree root. + * + * Note: Always use the 64 bit inode log format structure so we don't + * leave an uninitialised hole in the format item on 64 bit systems. Log + * recovery on 32 bit systems handles this just fine, so there's no reason + * for not using an initialising the properly padded structure all the time. */ STATIC void xfs_inode_item_format( @@ -412,8 +420,8 @@ xfs_inode_item_format( { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; + struct xfs_inode_log_format *ilf; ASSERT(ip->i_d.di_version > 1); @@ -425,7 +433,17 @@ xfs_inode_item_format( ilf->ilf_boffset = ip->i_imap.im_boffset; ilf->ilf_fields = XFS_ILOG_CORE; ilf->ilf_size = 2; /* format + core */ - xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); + + /* + * make sure we don't leak uninitialised data into the log in the case + * when we don't log every field in the inode. + */ + ilf->ilf_dsize = 0; + ilf->ilf_asize = 0; + ilf->ilf_pad = 0; + uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null); + + xlog_finish_iovec(lv, vecp, sizeof(*ilf)); xfs_inode_item_format_core(ip, lv, &vecp); xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); @@ -855,44 +873,29 @@ xfs_istale_done( } /* - * convert an xfs_inode_log_format struct from either 32 or 64 bit versions - * (which can have different field alignments) to the native version + * convert an xfs_inode_log_format struct from the old 32 bit version + * (which can have different field alignments) to the native 64 bit version */ int xfs_inode_item_format_convert( - xfs_log_iovec_t *buf, - xfs_inode_log_format_t *in_f) + struct xfs_log_iovec *buf, + struct xfs_inode_log_format *in_f) { - if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { - xfs_inode_log_format_32_t *in_f32 = buf->i_addr; - - in_f->ilf_type = in_f32->ilf_type; - in_f->ilf_size = in_f32->ilf_size; - in_f->ilf_fields = in_f32->ilf_fields; - in_f->ilf_asize = in_f32->ilf_asize; - in_f->ilf_dsize = in_f32->ilf_dsize; - in_f->ilf_ino = in_f32->ilf_ino; - /* copy biggest field of ilf_u */ - uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); - in_f->ilf_blkno = in_f32->ilf_blkno; - in_f->ilf_len = in_f32->ilf_len; - in_f->ilf_boffset = in_f32->ilf_boffset; - return 0; - } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ - xfs_inode_log_format_64_t *in_f64 = buf->i_addr; - - in_f->ilf_type = in_f64->ilf_type; - in_f->ilf_size = in_f64->ilf_size; - in_f->ilf_fields = in_f64->ilf_fields; - in_f->ilf_asize = in_f64->ilf_asize; - in_f->ilf_dsize = in_f64->ilf_dsize; - in_f->ilf_ino = in_f64->ilf_ino; - /* copy biggest field of ilf_u */ - uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid); - in_f->ilf_blkno = in_f64->ilf_blkno; - in_f->ilf_len = in_f64->ilf_len; - in_f->ilf_boffset = in_f64->ilf_boffset; - return 0; - } - return -EFSCORRUPTED; + struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; + + if (buf->i_len != sizeof(*in_f32)) + return -EFSCORRUPTED; + + in_f->ilf_type = in_f32->ilf_type; + in_f->ilf_size = in_f32->ilf_size; + in_f->ilf_fields = in_f32->ilf_fields; + in_f->ilf_asize = in_f32->ilf_asize; + in_f->ilf_dsize = in_f32->ilf_dsize; + in_f->ilf_ino = in_f32->ilf_ino; + /* copy biggest field of ilf_u */ + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); + in_f->ilf_blkno = in_f32->ilf_blkno; + in_f->ilf_len = in_f32->ilf_len; + in_f->ilf_boffset = in_f32->ilf_boffset; + return 0; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c5107c7bc4bf..dc95a49d62e7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2515,7 +2515,7 @@ next_lv: if (lv) vecp = lv->lv_iovecp; } - if (record_cnt == 0 && ordered == false) { + if (record_cnt == 0 && !ordered) { if (!lv) return 0; break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ea7d4b4e50d0..e9727d0a541a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -704,7 +704,7 @@ xfs_mountfs( xfs_set_maxicount(mp); /* enable fail_at_unmount as default */ - mp->m_fail_unmount = 1; + mp->m_fail_unmount = true; error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); if (error) diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 0c381d71b242..0492436a053f 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28); XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52); - XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 584cf2d573ba..f663022353c0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1637,7 +1637,7 @@ xfs_fs_fill_super( /* version 5 superblocks support inode version counters. */ if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; if (mp->m_flags & XFS_MOUNT_DAX) { xfs_warn(mp, |