diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-17 18:44:00 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-17 18:44:00 -0800 |
commit | 0110c350c86d511be2130cb2a30dcbb76c4af750 (patch) | |
tree | d343a9e0fcb586a7110b13d411b314d33d404c08 /fs/ocfs2/refcounttree.c | |
parent | d9cb5bfcc3339f1a63df8fe0af8cece33c83c3af (diff) | |
parent | 9763f7a4a5f7b1a7c480fa06d01b2bad25163c0a (diff) | |
download | linux-0110c350c86d511be2130cb2a30dcbb76c4af750.tar.bz2 |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull more vfs updates from Al Viro:
"In this pile:
- autofs-namespace series
- dedupe stuff
- more struct path constification"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (40 commits)
ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features
ocfs2: charge quota for reflinked blocks
ocfs2: fix bad pointer cast
ocfs2: always unlock when completing dio writes
ocfs2: don't eat io errors during _dio_end_io_write
ocfs2: budget for extent tree splits when adding refcount flag
ocfs2: prohibit refcounted swapfiles
ocfs2: add newlines to some error messages
ocfs2: convert inode refcount test to a helper
simple_write_end(): don't zero in short copy into uptodate
exofs: don't mess with simple_write_{begin,end}
9p: saner ->write_end() on failing copy into non-uptodate page
fix gfs2_stuffed_write_end() on short copies
fix ceph_write_end()
nfs_write_end(): fix handling of short copies
vfs: refactor clone/dedupe_file_range common functions
fs: try to clone files first in vfs_copy_file_range
vfs: misc struct path constification
namespace.c: constify struct path passed to a bunch of primitives
quota: constify struct path in quota_on
...
Diffstat (limited to 'fs/ocfs2/refcounttree.c')
-rw-r--r-- | fs/ocfs2/refcounttree.c | 464 |
1 files changed, 451 insertions, 13 deletions
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 738b4ea8e990..d171d2c53f7f 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -34,6 +34,7 @@ #include "xattr.h" #include "namei.h" #include "ocfs2_trace.h" +#include "file.h" #include <linux/bio.h> #include <linux/blkdev.h> @@ -410,7 +411,7 @@ static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) goto out; } - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); di = (struct ocfs2_dinode *)di_bh->b_data; *ref_blkno = le64_to_cpu(di->i_refcount_loc); @@ -569,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, u32 num_got; u64 suballoc_loc, first_blkno; - BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); + BUG_ON(ocfs2_is_refcount_inode(inode)); trace_ocfs2_create_refcount_tree( (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -707,7 +708,7 @@ static int ocfs2_set_refcount_tree(struct inode *inode, struct ocfs2_refcount_block *rb; struct ocfs2_refcount_tree *ref_tree; - BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); + BUG_ON(ocfs2_is_refcount_inode(inode)); ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, &ref_tree, &ref_root_bh); @@ -774,7 +775,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc); u16 bit = 0; - if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) + if (!ocfs2_is_refcount_inode(inode)) return 0; BUG_ON(!ref_blkno); @@ -2298,11 +2299,10 @@ int ocfs2_decrease_refcount(struct inode *inode, { int ret; u64 ref_blkno; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_get_refcount_block(inode, &ref_blkno); if (ret) { @@ -2532,7 +2532,6 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, int *ref_blocks) { int ret; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno); @@ -2543,7 +2542,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, goto out; } - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), refcount_loc, &tree); @@ -3411,14 +3410,13 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, { int ret; u32 cow_start = 0, cow_len = 0; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *ref_tree; struct ocfs2_cow_context *context = NULL; - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list, cpos, write_len, max_cpos, @@ -3628,11 +3626,10 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, { int ret; struct ocfs2_xattr_value_root *xv = vb->vb_xv; - struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_cow_context *context = NULL; u32 cow_start, cow_len; - BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!ocfs2_is_refcount_inode(inode)); ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list, cpos, write_len, UINT_MAX, @@ -3695,6 +3692,9 @@ int ocfs2_add_refcount_flag(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_alloc_context *meta_ac = NULL; + /* We need to be able to handle at least an extent tree split. */ + ref_blocks = ocfs2_extend_meta_needed(data_et->et_root_el); + ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, ref_ci, ref_root_bh, p_cluster, num_clusters, @@ -3806,7 +3806,7 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, ocfs2_init_dealloc_ctxt(&dealloc); - if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) { + if (!ocfs2_is_refcount_inode(inode)) { ret = ocfs2_create_refcount_tree(inode, di_bh); if (ret) { mlog_errno(ret); @@ -3933,6 +3933,13 @@ static int ocfs2_add_refcounted_extent(struct inode *inode, ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, p_cluster, num_clusters, meta_ac, dealloc); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ret = dquot_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, num_clusters)); if (ret) mlog_errno(ret); @@ -4441,3 +4448,434 @@ out: return error; } + +/* Update destination inode size, if necessary. */ +static int ocfs2_reflink_update_dest(struct inode *dest, + struct buffer_head *d_bh, + loff_t newlen) +{ + handle_t *handle; + int ret; + + dest->i_blocks = ocfs2_inode_sector_count(dest); + + if (newlen <= i_size_read(dest)) + return 0; + + handle = ocfs2_start_trans(OCFS2_SB(dest->i_sb), + OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + return ret; + } + + /* Extend i_size if needed. */ + spin_lock(&OCFS2_I(dest)->ip_lock); + if (newlen > i_size_read(dest)) + i_size_write(dest, newlen); + spin_unlock(&OCFS2_I(dest)->ip_lock); + dest->i_ctime = dest->i_mtime = current_time(dest); + + ret = ocfs2_mark_inode_dirty(handle, dest, d_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + +out_commit: + ocfs2_commit_trans(OCFS2_SB(dest->i_sb), handle); + return ret; +} + +/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */ +static int ocfs2_reflink_remap_extent(struct inode *s_inode, + struct buffer_head *s_bh, + loff_t pos_in, + struct inode *t_inode, + struct buffer_head *t_bh, + loff_t pos_out, + loff_t len, + struct ocfs2_cached_dealloc_ctxt *dealloc) +{ + struct ocfs2_extent_tree s_et; + struct ocfs2_extent_tree t_et; + struct ocfs2_dinode *dis; + struct buffer_head *ref_root_bh = NULL; + struct ocfs2_refcount_tree *ref_tree; + struct ocfs2_super *osb; + loff_t pstart, plen; + u32 p_cluster, num_clusters, slast, spos, tpos; + unsigned int ext_flags; + int ret = 0; + + osb = OCFS2_SB(s_inode->i_sb); + dis = (struct ocfs2_dinode *)s_bh->b_data; + ocfs2_init_dinode_extent_tree(&s_et, INODE_CACHE(s_inode), s_bh); + ocfs2_init_dinode_extent_tree(&t_et, INODE_CACHE(t_inode), t_bh); + + spos = ocfs2_bytes_to_clusters(s_inode->i_sb, pos_in); + tpos = ocfs2_bytes_to_clusters(t_inode->i_sb, pos_out); + slast = ocfs2_clusters_for_bytes(s_inode->i_sb, pos_in + len); + + while (spos < slast) { + if (fatal_signal_pending(current)) { + ret = -EINTR; + goto out; + } + + /* Look up the extent. */ + ret = ocfs2_get_clusters(s_inode, spos, &p_cluster, + &num_clusters, &ext_flags); + if (ret) { + mlog_errno(ret); + goto out; + } + + num_clusters = min_t(u32, num_clusters, slast - spos); + + /* Punch out the dest range. */ + pstart = ocfs2_clusters_to_bytes(t_inode->i_sb, tpos); + plen = ocfs2_clusters_to_bytes(t_inode->i_sb, num_clusters); + ret = ocfs2_remove_inode_range(t_inode, t_bh, pstart, plen); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (p_cluster == 0) + goto next_loop; + + /* Lock the refcount btree... */ + ret = ocfs2_lock_refcount_tree(osb, + le64_to_cpu(dis->i_refcount_loc), + 1, &ref_tree, &ref_root_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* Mark s_inode's extent as refcounted. */ + if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) { + ret = ocfs2_add_refcount_flag(s_inode, &s_et, + &ref_tree->rf_ci, + ref_root_bh, spos, + p_cluster, num_clusters, + dealloc, NULL); + if (ret) { + mlog_errno(ret); + goto out_unlock_refcount; + } + } + + /* Map in the new extent. */ + ext_flags |= OCFS2_EXT_REFCOUNTED; + ret = ocfs2_add_refcounted_extent(t_inode, &t_et, + &ref_tree->rf_ci, + ref_root_bh, + tpos, p_cluster, + num_clusters, + ext_flags, + dealloc); + if (ret) { + mlog_errno(ret); + goto out_unlock_refcount; + } + + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + brelse(ref_root_bh); +next_loop: + spos += num_clusters; + tpos += num_clusters; + } + +out: + return ret; +out_unlock_refcount: + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + brelse(ref_root_bh); + return ret; +} + +/* Set up refcount tree and remap s_inode to t_inode. */ +static int ocfs2_reflink_remap_blocks(struct inode *s_inode, + struct buffer_head *s_bh, + loff_t pos_in, + struct inode *t_inode, + struct buffer_head *t_bh, + loff_t pos_out, + loff_t len) +{ + struct ocfs2_cached_dealloc_ctxt dealloc; + struct ocfs2_super *osb; + struct ocfs2_dinode *dis; + struct ocfs2_dinode *dit; + int ret; + + osb = OCFS2_SB(s_inode->i_sb); + dis = (struct ocfs2_dinode *)s_bh->b_data; + dit = (struct ocfs2_dinode *)t_bh->b_data; + ocfs2_init_dealloc_ctxt(&dealloc); + + /* + * If we're reflinking the entire file and the source is inline + * data, just copy the contents. + */ + if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) && + i_size_read(t_inode) <= len && + (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) { + ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh); + if (ret) + mlog_errno(ret); + goto out; + } + + /* + * If both inodes belong to two different refcount groups then + * forget it because we don't know how (or want) to go merging + * refcount trees. + */ + ret = -EOPNOTSUPP; + if (ocfs2_is_refcount_inode(s_inode) && + ocfs2_is_refcount_inode(t_inode) && + le64_to_cpu(dis->i_refcount_loc) != + le64_to_cpu(dit->i_refcount_loc)) + goto out; + + /* Neither inode has a refcount tree. Add one to s_inode. */ + if (!ocfs2_is_refcount_inode(s_inode) && + !ocfs2_is_refcount_inode(t_inode)) { + ret = ocfs2_create_refcount_tree(s_inode, s_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + /* Ensure that both inodes end up with the same refcount tree. */ + if (!ocfs2_is_refcount_inode(s_inode)) { + ret = ocfs2_set_refcount_tree(s_inode, s_bh, + le64_to_cpu(dit->i_refcount_loc)); + if (ret) { + mlog_errno(ret); + goto out; + } + } + if (!ocfs2_is_refcount_inode(t_inode)) { + ret = ocfs2_set_refcount_tree(t_inode, t_bh, + le64_to_cpu(dis->i_refcount_loc)); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + /* Turn off inline data in the dest file. */ + if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + /* Actually remap extents now. */ + ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh, + pos_out, len, &dealloc); + if (ret) { + mlog_errno(ret); + goto out; + } + +out: + if (ocfs2_dealloc_has_cluster(&dealloc)) { + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &dealloc); + } + + return ret; +} + +/* Lock an inode and grab a bh pointing to the inode. */ +static int ocfs2_reflink_inodes_lock(struct inode *s_inode, + struct buffer_head **bh1, + struct inode *t_inode, + struct buffer_head **bh2) +{ + struct inode *inode1; + struct inode *inode2; + struct ocfs2_inode_info *oi1; + struct ocfs2_inode_info *oi2; + bool same_inode = (s_inode == t_inode); + int status; + + /* First grab the VFS and rw locks. */ + lock_two_nondirectories(s_inode, t_inode); + inode1 = s_inode; + inode2 = t_inode; + if (inode1->i_ino > inode2->i_ino) + swap(inode1, inode2); + + status = ocfs2_rw_lock(inode1, 1); + if (status) { + mlog_errno(status); + goto out_i1; + } + if (!same_inode) { + status = ocfs2_rw_lock(inode2, 1); + if (status) { + mlog_errno(status); + goto out_i2; + } + } + + /* Now go for the cluster locks */ + oi1 = OCFS2_I(inode1); + oi2 = OCFS2_I(inode2); + + trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, + (unsigned long long)oi2->ip_blkno); + + if (*bh1) + *bh1 = NULL; + if (*bh2) + *bh2 = NULL; + + /* We always want to lock the one with the lower lockid first. */ + if (oi1->ip_blkno > oi2->ip_blkno) + mlog_errno(-ENOLCK); + + /* lock id1 */ + status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + goto out_rw2; + } + + /* lock id2 */ + if (!same_inode) { + status = ocfs2_inode_lock_nested(inode2, bh2, 1, + OI_LS_REFLINK_TARGET); + if (status < 0) { + if (status != -ENOENT) + mlog_errno(status); + goto out_cl1; + } + } else + *bh2 = *bh1; + + trace_ocfs2_double_lock_end( + (unsigned long long)OCFS2_I(inode1)->ip_blkno, + (unsigned long long)OCFS2_I(inode2)->ip_blkno); + + return 0; + +out_cl1: + ocfs2_inode_unlock(inode1, 1); + brelse(*bh1); + *bh1 = NULL; +out_rw2: + ocfs2_rw_unlock(inode2, 1); +out_i2: + ocfs2_rw_unlock(inode1, 1); +out_i1: + unlock_two_nondirectories(s_inode, t_inode); + return status; +} + +/* Unlock both inodes and release buffers. */ +static void ocfs2_reflink_inodes_unlock(struct inode *s_inode, + struct buffer_head *s_bh, + struct inode *t_inode, + struct buffer_head *t_bh) +{ + ocfs2_inode_unlock(s_inode, 1); + ocfs2_rw_unlock(s_inode, 1); + brelse(s_bh); + if (s_inode != t_inode) { + ocfs2_inode_unlock(t_inode, 1); + ocfs2_rw_unlock(t_inode, 1); + brelse(t_bh); + } + unlock_two_nondirectories(s_inode, t_inode); +} + +/* Link a range of blocks from one file to another. */ +int ocfs2_reflink_remap_range(struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb); + struct buffer_head *in_bh = NULL, *out_bh = NULL; + bool same_inode = (inode_in == inode_out); + ssize_t ret; + + if (!ocfs2_refcount_tree(osb)) + return -EOPNOTSUPP; + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return -EROFS; + + /* Lock both files against IO */ + ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh); + if (ret) + return ret; + + /* Check file eligibility and prepare for block sharing. */ + ret = -EINVAL; + if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) || + (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) + goto out_unlock; + + ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, + &len, is_dedupe); + if (ret || len == 0) + goto out_unlock; + + /* Lock out changes to the allocation maps and remap. */ + down_write(&OCFS2_I(inode_in)->ip_alloc_sem); + if (!same_inode) + down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem, + SINGLE_DEPTH_NESTING); + + ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out, + out_bh, pos_out, len); + + /* Zap any page cache for the destination file's range. */ + if (!ret) + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + len) - 1); + + up_write(&OCFS2_I(inode_in)->ip_alloc_sem); + if (!same_inode) + up_write(&OCFS2_I(inode_out)->ip_alloc_sem); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + /* + * Empty the extent map so that we may get the right extent + * record from the disk. + */ + ocfs2_extent_map_trunc(inode_in, 0); + ocfs2_extent_map_trunc(inode_out, 0); + + ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); + return 0; + +out_unlock: + ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); + return ret; +} |