From 84e40080bd6f363ddbcab75b04cb7bc742efbf12 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 9 Nov 2016 14:13:09 -0800 Subject: ocfs2: convert inode refcount test to a helper Replace the open-coded inode refcount flag test with a helper function to reduce the potential for bugs. Signed-off-by: Darrick J. Wong --- fs/ocfs2/alloc.c | 3 +-- fs/ocfs2/file.c | 7 +++---- fs/ocfs2/inode.h | 6 ++++++ fs/ocfs2/move_extents.c | 10 ++-------- fs/ocfs2/refcounttree.c | 22 +++++++++------------- fs/ocfs2/xattr.c | 4 ++-- 6 files changed, 23 insertions(+), 29 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f72712f6c28d..a0ca49f09880 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5713,8 +5713,7 @@ int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_refcount_tree *ref_tree = NULL; if ((flags & OCFS2_EXT_REFCOUNTED) && len) { - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & - OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); if (!refcount_tree_locked) { ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 000c234d7bbd..d261f3a91870 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1030,7 +1030,7 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, * Only quota files call this without a bh, and they can't be * refcounted. */ - BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!di_bh && ocfs2_is_refcount_inode(inode)); BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE)); clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); @@ -1719,8 +1719,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, * within one cluster(means is not exactly aligned to clustersize). */ - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { - + if (ocfs2_is_refcount_inode(inode)) { ret = ocfs2_cow_file_pos(inode, di_bh, byte_start); if (ret) { mlog_errno(ret); @@ -2036,7 +2035,7 @@ int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, struct super_block *sb = inode->i_sb; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) || - !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) || + !ocfs2_is_refcount_inode(inode) || OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) return 0; diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 5af68fcdf9d3..9b955f732bca 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -181,4 +181,10 @@ static inline struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_ return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache); } +/* Does this inode have the reflink flag set? */ +static inline bool ocfs2_is_refcount_inode(struct inode *inode) +{ + return (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); +} + #endif /* OCFS2_INODE_H */ diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 4e8f32eb0bdb..e52a2852d50d 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -235,10 +235,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { - - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & - OCFS2_HAS_REFCOUNT_FL)); - + BUG_ON(!ocfs2_is_refcount_inode(inode)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, @@ -581,10 +578,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { - - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & - OCFS2_HAS_REFCOUNT_FL)); - + BUG_ON(!ocfs2_is_refcount_inode(inode)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19238512a324..3410eb105b0d 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -410,7 +410,7 @@ static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) goto out; } - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); di = (struct ocfs2_dinode *)di_bh->b_data; *ref_blkno = le64_to_cpu(di->i_refcount_loc); @@ -570,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, u32 num_got; u64 suballoc_loc, first_blkno; - BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); + BUG_ON(ocfs2_is_refcount_inode(inode)); trace_ocfs2_create_refcount_tree( (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -708,7 +708,7 @@ static int ocfs2_set_refcount_tree(struct inode *inode, struct ocfs2_refcount_block *rb; struct ocfs2_refcount_tree *ref_tree; - BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); + BUG_ON(ocfs2_is_refcount_inode(inode)); ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, &ref_tree, &ref_root_bh); @@ -775,7 +775,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc); u16 bit = 0; - if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) + if (!ocfs2_is_refcount_inode(inode)) return 0; BUG_ON(!ref_blkno); @@ -2299,11 +2299,10 @@ int ocfs2_decrease_refcount(struct inode *inode, { int ret; u64 ref_blkno; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_get_refcount_block(inode, &ref_blkno); if (ret) { @@ -2533,7 +2532,6 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, int *ref_blocks) { int ret; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno); @@ -2544,7 +2542,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, goto out; } - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), refcount_loc, &tree); @@ -3412,14 +3410,13 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, { int ret; u32 cow_start = 0, cow_len = 0; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *ref_tree; struct ocfs2_cow_context *context = NULL; - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list, cpos, write_len, max_cpos, @@ -3629,11 +3626,10 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, { int ret; struct ocfs2_xattr_value_root *xv = vb->vb_xv; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_cow_context *context = NULL; u32 cow_start, cow_len; - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list, cpos, write_len, UINT_MAX, @@ -3807,7 +3803,7 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, ocfs2_init_dealloc_ctxt(&dealloc); - if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) { + if (!ocfs2_is_refcount_inode(inode)) { ret = ocfs2_create_refcount_tree(inode, di_bh); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index cb157a34a656..3c5384d9b3a5 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2577,7 +2577,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) return 0; - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { + if (ocfs2_is_refcount_inode(inode)) { ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); @@ -3608,7 +3608,7 @@ int ocfs2_xattr_set(struct inode *inode, } /* Check whether the value is refcounted and do some preparation. */ - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && + if (ocfs2_is_refcount_inode(inode) && (!xis.not_found || !xbs.not_found)) { ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, &xis, &xbs, &ref_tree, -- cgit v1.2.3 From 86544fbd853c49a9eccb3d0f4e7eb9317f3fccf9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 9 Nov 2016 14:13:09 -0800 Subject: ocfs2: add newlines to some error messages These two error messages are missing the trailing newline. Signed-off-by: Darrick J. Wong --- fs/ocfs2/alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index a0ca49f09880..d4ec0d8961a6 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5194,7 +5194,7 @@ int ocfs2_change_extent_flag(handle_t *handle, rec = &el->l_recs[index]; if (new_flags && (rec->e_flags & new_flags)) { mlog(ML_ERROR, "Owner %llu tried to set %d flags on an " - "extent that already had them", + "extent that already had them\n", (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), new_flags); goto out; @@ -5202,7 +5202,7 @@ int ocfs2_change_extent_flag(handle_t *handle, if (clear_flags && !(rec->e_flags & clear_flags)) { mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an " - "extent that didn't have them", + "extent that didn't have them\n", (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), clear_flags); goto out; -- cgit v1.2.3 From 06a70305812c3973c66824f26223656283c59b27 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 9 Nov 2016 14:13:10 -0800 Subject: ocfs2: prohibit refcounted swapfiles The swapfile mechanism calls bmap once to find all the swap file mappings, which means that we cannot properly support CoW remapping. Therefore, error out if the swap code tries to call bmap on a refcounted file. Signed-off-by: Darrick J. Wong --- fs/ocfs2/aops.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c5c5b9748ea3..4d037db84be5 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -464,6 +464,15 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)block); + /* + * The swap code (ab-)uses ->bmap to get a block mapping and then + * bypasseѕ the file system for actual I/O. We really can't allow + * that on refcounted inodes, so we have to skip out here. And yes, + * 0 is the magic code for a bmap error.. + */ + if (ocfs2_is_refcount_inode(inode)) + return 0; + /* We don't need to lock journal system files, since they aren't * accessed concurrently from multiple nodes. */ -- cgit v1.2.3 From 3e10b793fc40dfdbe51762e0d084bd6f2c8acaaa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 9 Nov 2016 14:13:11 -0800 Subject: ocfs2: budget for extent tree splits when adding refcount flag When we're adding the refcount flag to an extent, we have to budget enough space to handle a full extent btree split in addition to whatever modifications have to be made to the refcount btree. We don't currently do this, with the result that generic/186 crashes when we need an extent split but not a refcount split because meta_ac never gets allocated. Signed-off-by: Darrick J. Wong --- fs/ocfs2/refcounttree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3410eb105b0d..6c98d567ba01 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3692,6 +3692,9 @@ int ocfs2_add_refcount_flag(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_alloc_context *meta_ac = NULL; + /* We need to be able to handle at least an extent tree split. */ + ref_blocks = ocfs2_extend_meta_needed(data_et->et_root_el); + ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, ref_ci, ref_root_bh, p_cluster, num_clusters, -- cgit v1.2.3 From 085549553dca86c866f26d233d9cfe19f169c288 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 9 Nov 2016 14:42:49 -0800 Subject: ocfs2: don't eat io errors during _dio_end_io_write ocfs2_dio_end_io_write eats whatever errors may happen, which means that write errors do not propagate to userspace. Fix that. Signed-off-by: Darrick J. Wong --- fs/ocfs2/aops.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 4d037db84be5..136a49cabc12 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2263,10 +2263,10 @@ out: return ret; } -static void ocfs2_dio_end_io_write(struct inode *inode, - struct ocfs2_dio_write_ctxt *dwc, - loff_t offset, - ssize_t bytes) +static int ocfs2_dio_end_io_write(struct inode *inode, + struct ocfs2_dio_write_ctxt *dwc, + loff_t offset, + ssize_t bytes) { struct ocfs2_cached_dealloc_ctxt dealloc; struct ocfs2_extent_tree et; @@ -2374,6 +2374,8 @@ out: if (locked) inode_unlock(inode); ocfs2_dio_free_write_ctx(inode, dwc); + + return ret; } /* @@ -2388,6 +2390,7 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, { struct inode *inode = file_inode(iocb->ki_filp); int level; + int ret = 0; if (bytes <= 0) return 0; @@ -2396,13 +2399,13 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); if (private) - ocfs2_dio_end_io_write(inode, private, offset, bytes); + ret = ocfs2_dio_end_io_write(inode, private, offset, bytes); ocfs2_iocb_clear_rw_locked(iocb); level = ocfs2_iocb_rw_locked_level(iocb); ocfs2_rw_unlock(inode, level); - return 0; + return ret; } static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -- cgit v1.2.3 From dbf896fc286a62bf215b904c6ff5d197df93e685 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 1 Dec 2016 16:31:14 -0800 Subject: ocfs2: always unlock when completing dio writes Always unlock the inode when completing dio writes, even if an error has occurrred. The caller already checks the inode and unlocks it if needed, so we might as well reduce contention. Signed-off-by: Darrick J. Wong --- fs/ocfs2/aops.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 136a49cabc12..3c531f108a21 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2392,13 +2392,10 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, int level; int ret = 0; - if (bytes <= 0) - return 0; - /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); - if (private) + if (bytes > 0 && private) ret = ocfs2_dio_end_io_write(inode, private, offset, bytes); ocfs2_iocb_clear_rw_locked(iocb); -- cgit v1.2.3 From aef73a61c01a4dca3f26c22df05039f78fe9d468 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 9 Dec 2016 16:10:15 -0800 Subject: ocfs2: fix bad pointer cast generic/188 triggered a dmesg stack trace because the dio completion was casting a buffer head to an on-disk inode, which is whacky. Signed-off-by: Darrick J. Wong --- fs/ocfs2/aops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3c531f108a21..3372d82d12b6 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2317,7 +2317,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, mlog_errno(ret); } - di = (struct ocfs2_dinode *)di_bh; + di = (struct ocfs2_dinode *)di_bh->b_data; ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); -- cgit v1.2.3 From 86e59436d406d833a5da4a94aefb3c3be6b26053 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 22 Nov 2016 13:40:27 -0800 Subject: ocfs2: charge quota for reflinked blocks When ocfs2 shares blocks from one file to another, it's necessary to charge that many blocks to the quota because ocfs2 tallies block charges according to the number of blocks mapped, not the number of physical blocks used. Without this patch, reflinking X blocks and then CoWing all of them causes quota usage to *decrease* by X as seen in generic/305. Signed-off-by: Darrick J. Wong --- fs/ocfs2/refcounttree.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 6c98d567ba01..dc8089af9ddf 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3933,6 +3933,13 @@ static int ocfs2_add_refcounted_extent(struct inode *inode, ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, p_cluster, num_clusters, meta_ac, dealloc); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ret = dquot_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, num_clusters)); if (ret) mlog_errno(ret); -- cgit v1.2.3 From 29ac8e856cb3694e004037de595dec4ec53d42f2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 9 Nov 2016 14:13:11 -0800 Subject: ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features Connect the new VFS clone_range, copy_range, and dedupe_range features to the existing reflink capability of ocfs2. Compared to the existing ocfs2 reflink ioctl We have to do things a little differently to support the VFS semantics (we can clone subranges of a file but we don't clone xattrs), but the VFS ioctls are more broadly supported. Signed-off-by: Darrick J. Wong --- v2: Convert inline data files to extents files before reflinking, and fix i_blocks so that stat(2) output is correct. v3: Make zero-length dedupe consistent with btrfs behavior. v4: Use VFS double-inode lock routines and remove MAX_DEDUPE_LEN. --- fs/ocfs2/file.c | 35 +++- fs/ocfs2/file.h | 3 + fs/ocfs2/refcounttree.c | 432 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/refcounttree.h | 7 + 4 files changed, 474 insertions(+), 3 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d261f3a91870..c4889655d32b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1667,9 +1667,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode, *done = ret; } -static int ocfs2_remove_inode_range(struct inode *inode, - struct buffer_head *di_bh, u64 byte_start, - u64 byte_len) +int ocfs2_remove_inode_range(struct inode *inode, + struct buffer_head *di_bh, u64 byte_start, + u64 byte_len) { int ret = 0, flags = 0, done = 0, i; u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos; @@ -2439,6 +2439,31 @@ out: return offset; } +static int ocfs2_file_clone_range(struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len) +{ + return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, + len, false); +} + +static ssize_t ocfs2_file_dedupe_range(struct file *src_file, + u64 loff, + u64 len, + struct file *dst_file, + u64 dst_loff) +{ + int error; + + error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff, + len, true); + if (error) + return error; + return len; +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2478,6 +2503,8 @@ const struct file_operations ocfs2_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, + .clone_file_range = ocfs2_file_clone_range, + .dedupe_file_range = ocfs2_file_dedupe_range, }; const struct file_operations ocfs2_dops = { @@ -2523,6 +2550,8 @@ const struct file_operations ocfs2_fops_no_plocks = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, + .clone_file_range = ocfs2_file_clone_range, + .dedupe_file_range = ocfs2_file_dedupe_range, }; const struct file_operations ocfs2_dops_no_plocks = { diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index e8c62f22215c..897fd9a2e51d 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -82,4 +82,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, size_t count); +int ocfs2_remove_inode_range(struct inode *inode, + struct buffer_head *di_bh, u64 byte_start, + u64 byte_len); #endif /* OCFS2_FILE_H */ diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index dc8089af9ddf..b18465e330b1 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -34,6 +34,7 @@ #include "xattr.h" #include "namei.h" #include "ocfs2_trace.h" +#include "file.h" #include #include @@ -4448,3 +4449,434 @@ out: return error; } + +/* Update destination inode size, if necessary. */ +static int ocfs2_reflink_update_dest(struct inode *dest, + struct buffer_head *d_bh, + loff_t newlen) +{ + handle_t *handle; + int ret; + + dest->i_blocks = ocfs2_inode_sector_count(dest); + + if (newlen <= i_size_read(dest)) + return 0; + + handle = ocfs2_start_trans(OCFS2_SB(dest->i_sb), + OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + return ret; + } + + /* Extend i_size if needed. */ + spin_lock(&OCFS2_I(dest)->ip_lock); + if (newlen > i_size_read(dest)) + i_size_write(dest, newlen); + spin_unlock(&OCFS2_I(dest)->ip_lock); + dest->i_ctime = dest->i_mtime = current_time(dest); + + ret = ocfs2_mark_inode_dirty(handle, dest, d_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + +out_commit: + ocfs2_commit_trans(OCFS2_SB(dest->i_sb), handle); + return ret; +} + +/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */ +static int ocfs2_reflink_remap_extent(struct inode *s_inode, + struct buffer_head *s_bh, + loff_t pos_in, + struct inode *t_inode, + struct buffer_head *t_bh, + loff_t pos_out, + loff_t len, + struct ocfs2_cached_dealloc_ctxt *dealloc) +{ + struct ocfs2_extent_tree s_et; + struct ocfs2_extent_tree t_et; + struct ocfs2_dinode *dis; + struct buffer_head *ref_root_bh = NULL; + struct ocfs2_refcount_tree *ref_tree; + struct ocfs2_super *osb; + loff_t pstart, plen; + u32 p_cluster, num_clusters, slast, spos, tpos; + unsigned int ext_flags; + int ret = 0; + + osb = OCFS2_SB(s_inode->i_sb); + dis = (struct ocfs2_dinode *)s_bh->b_data; + ocfs2_init_dinode_extent_tree(&s_et, INODE_CACHE(s_inode), s_bh); + ocfs2_init_dinode_extent_tree(&t_et, INODE_CACHE(t_inode), t_bh); + + spos = ocfs2_bytes_to_clusters(s_inode->i_sb, pos_in); + tpos = ocfs2_bytes_to_clusters(t_inode->i_sb, pos_out); + slast = ocfs2_clusters_for_bytes(s_inode->i_sb, pos_in + len); + + while (spos < slast) { + if (fatal_signal_pending(current)) { + ret = -EINTR; + goto out; + } + + /* Look up the extent. */ + ret = ocfs2_get_clusters(s_inode, spos, &p_cluster, + &num_clusters, &ext_flags); + if (ret) { + mlog_errno(ret); + goto out; + } + + num_clusters = min_t(u32, num_clusters, slast - spos); + + /* Punch out the dest range. */ + pstart = ocfs2_clusters_to_bytes(t_inode->i_sb, tpos); + plen = ocfs2_clusters_to_bytes(t_inode->i_sb, num_clusters); + ret = ocfs2_remove_inode_range(t_inode, t_bh, pstart, plen); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (p_cluster == 0) + goto next_loop; + + /* Lock the refcount btree... */ + ret = ocfs2_lock_refcount_tree(osb, + le64_to_cpu(dis->i_refcount_loc), + 1, &ref_tree, &ref_root_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* Mark s_inode's extent as refcounted. */ + if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) { + ret = ocfs2_add_refcount_flag(s_inode, &s_et, + &ref_tree->rf_ci, + ref_root_bh, spos, + p_cluster, num_clusters, + dealloc, NULL); + if (ret) { + mlog_errno(ret); + goto out_unlock_refcount; + } + } + + /* Map in the new extent. */ + ext_flags |= OCFS2_EXT_REFCOUNTED; + ret = ocfs2_add_refcounted_extent(t_inode, &t_et, + &ref_tree->rf_ci, + ref_root_bh, + tpos, p_cluster, + num_clusters, + ext_flags, + dealloc); + if (ret) { + mlog_errno(ret); + goto out_unlock_refcount; + } + + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + brelse(ref_root_bh); +next_loop: + spos += num_clusters; + tpos += num_clusters; + } + +out: + return ret; +out_unlock_refcount: + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + brelse(ref_root_bh); + return ret; +} + +/* Set up refcount tree and remap s_inode to t_inode. */ +static int ocfs2_reflink_remap_blocks(struct inode *s_inode, + struct buffer_head *s_bh, + loff_t pos_in, + struct inode *t_inode, + struct buffer_head *t_bh, + loff_t pos_out, + loff_t len) +{ + struct ocfs2_cached_dealloc_ctxt dealloc; + struct ocfs2_super *osb; + struct ocfs2_dinode *dis; + struct ocfs2_dinode *dit; + int ret; + + osb = OCFS2_SB(s_inode->i_sb); + dis = (struct ocfs2_dinode *)s_bh->b_data; + dit = (struct ocfs2_dinode *)t_bh->b_data; + ocfs2_init_dealloc_ctxt(&dealloc); + + /* + * If we're reflinking the entire file and the source is inline + * data, just copy the contents. + */ + if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) && + i_size_read(t_inode) <= len && + (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) { + ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh); + if (ret) + mlog_errno(ret); + goto out; + } + + /* + * If both inodes belong to two different refcount groups then + * forget it because we don't know how (or want) to go merging + * refcount trees. + */ + ret = -EOPNOTSUPP; + if (ocfs2_is_refcount_inode(s_inode) && + ocfs2_is_refcount_inode(t_inode) && + le64_to_cpu(dis->i_refcount_loc) != + le64_to_cpu(dit->i_refcount_loc)) + goto out; + + /* Neither inode has a refcount tree. Add one to s_inode. */ + if (!ocfs2_is_refcount_inode(s_inode) && + !ocfs2_is_refcount_inode(t_inode)) { + ret = ocfs2_create_refcount_tree(s_inode, s_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + /* Ensure that both inodes end up with the same refcount tree. */ + if (!ocfs2_is_refcount_inode(s_inode)) { + ret = ocfs2_set_refcount_tree(s_inode, s_bh, + le64_to_cpu(dit->i_refcount_loc)); + if (ret) { + mlog_errno(ret); + goto out; + } + } + if (!ocfs2_is_refcount_inode(t_inode)) { + ret = ocfs2_set_refcount_tree(t_inode, t_bh, + le64_to_cpu(dis->i_refcount_loc)); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + /* Turn off inline data in the dest file. */ + if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + /* Actually remap extents now. */ + ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh, + pos_out, len, &dealloc); + if (ret) { + mlog_errno(ret); + goto out; + } + +out: + if (ocfs2_dealloc_has_cluster(&dealloc)) { + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &dealloc); + } + + return ret; +} + +/* Lock an inode and grab a bh pointing to the inode. */ +static int ocfs2_reflink_inodes_lock(struct inode *s_inode, + struct buffer_head **bh1, + struct inode *t_inode, + struct buffer_head **bh2) +{ + struct inode *inode1; + struct inode *inode2; + struct ocfs2_inode_info *oi1; + struct ocfs2_inode_info *oi2; + bool same_inode = (s_inode == t_inode); + int status; + + /* First grab the VFS and rw locks. */ + lock_two_nondirectories(s_inode, t_inode); + inode1 = s_inode; + inode2 = t_inode; + if (inode1->i_ino > inode2->i_ino) + swap(inode1, inode2); + + status = ocfs2_rw_lock(inode1, 1); + if (status) { + mlog_errno(status); + goto out_i1; + } + if (!same_inode) { + status = ocfs2_rw_lock(inode2, 1); + if (status) { + mlog_errno(status); + goto out_i2; + } + } + + /* Now go for the cluster locks */ + oi1 = OCFS2_I(inode1); + oi2 = OCFS2_I(inode2); + + trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, + (unsigned long long)oi2->ip_blkno); + + if (*bh1) + *bh1 = NULL; + if (*bh2) + *bh2 = NULL; + + /* We always want to lock the one with the lower lockid first. */ + if (oi1->ip_blkno > oi2->ip_blkno) + mlog_errno(-ENOLCK); + + /* lock id1 */ + status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + goto out_rw2; + } + + /* lock id2 */ + if (!same_inode) { + status = ocfs2_inode_lock_nested(inode2, bh2, 1, + OI_LS_REFLINK_TARGET); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + goto out_cl1; + } + } else + *bh2 = *bh1; + + trace_ocfs2_double_lock_end( + (unsigned long long)OCFS2_I(inode1)->ip_blkno, + (unsigned long long)OCFS2_I(inode2)->ip_blkno); + + return 0; + +out_cl1: + ocfs2_inode_unlock(inode1, 1); + brelse(*bh1); + *bh1 = NULL; +out_rw2: + ocfs2_rw_unlock(inode2, 1); +out_i2: + ocfs2_rw_unlock(inode1, 1); +out_i1: + unlock_two_nondirectories(s_inode, t_inode); + return status; +} + +/* Unlock both inodes and release buffers. */ +static void ocfs2_reflink_inodes_unlock(struct inode *s_inode, + struct buffer_head *s_bh, + struct inode *t_inode, + struct buffer_head *t_bh) +{ + ocfs2_inode_unlock(s_inode, 1); + ocfs2_rw_unlock(s_inode, 1); + brelse(s_bh); + if (s_inode != t_inode) { + ocfs2_inode_unlock(t_inode, 1); + ocfs2_rw_unlock(t_inode, 1); + brelse(t_bh); + } + unlock_two_nondirectories(s_inode, t_inode); +} + +/* Link a range of blocks from one file to another. */ +int ocfs2_reflink_remap_range(struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb); + struct buffer_head *in_bh = NULL, *out_bh = NULL; + bool same_inode = (inode_in == inode_out); + ssize_t ret; + + if (!ocfs2_refcount_tree(osb)) + return -EOPNOTSUPP; + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return -EROFS; + + /* Lock both files against IO */ + ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh); + if (ret) + return ret; + + /* Check file eligibility and prepare for block sharing. */ + ret = -EINVAL; + if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) || + (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) + goto out_unlock; + + ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, + &len, is_dedupe); + if (ret || len == 0) + goto out_unlock; + + /* Lock out changes to the allocation maps and remap. */ + down_write(&OCFS2_I(inode_in)->ip_alloc_sem); + if (!same_inode) + down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem, + SINGLE_DEPTH_NESTING); + + ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out, + out_bh, pos_out, len); + + /* Zap any page cache for the destination file's range. */ + if (!ret) + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + len) - 1); + + up_write(&OCFS2_I(inode_in)->ip_alloc_sem); + if (!same_inode) + up_write(&OCFS2_I(inode_out)->ip_alloc_sem); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + /* + * Empty the extent map so that we may get the right extent + * record from the disk. + */ + ocfs2_extent_map_trunc(inode_in, 0); + ocfs2_extent_map_trunc(inode_out, 0); + + ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); + return 0; + +out_unlock: + ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); + return ret; +} diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 6422bbcdb525..4af55bf4b35b 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -115,4 +115,11 @@ int ocfs2_reflink_ioctl(struct inode *inode, const char __user *oldname, const char __user *newname, bool preserve); +int ocfs2_reflink_remap_range(struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe); + #endif /* OCFS2_REFCOUNTTREE_H */ -- cgit v1.2.3