summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/send.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 10:49:20 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 10:49:20 +0900
commit72055425e53540d9d0e59a57ac8c9b8ce77b62d5 (patch)
tree8033d7d7bfdf8725eed785d02f7121d201052d2e /fs/btrfs/send.c
parentfc81c038c2d61d4fcd8150f383fec1ce23087597 (diff)
parentf46dbe3dee853f8a860f889cb2b7ff4c624f2a7a (diff)
downloadlinux-72055425e53540d9d0e59a57ac8c9b8ce77b62d5.tar.bz2
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "This is a large pull, with the bulk of the updates coming from: - Hole punching - send/receive fixes - fsync performance - Disk format extension allowing more hardlinks inside a single directory (btrfs-progs patch required to enable the compat bit for this one) I'm cooking more unrelated RAID code, but I wanted to make sure this original batch makes it in. The largest updates here are relatively old and have been in testing for some time." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (121 commits) btrfs: init ref_index to zero in add_inode_ref Btrfs: remove repeated eb->pages check in, disk-io.c/csum_dirty_buffer Btrfs: fix page leakage Btrfs: do not warn_on when we cannot alloc a page for an extent buffer Btrfs: don't bug on enomem in readpage Btrfs: cleanup pages properly when ENOMEM in compression Btrfs: make filesystem read-only when submitting barrier fails Btrfs: detect corrupted filesystem after write I/O errors Btrfs: make compress and nodatacow mount options mutually exclusive btrfs: fix message printing Btrfs: don't bother committing delayed inode updates when fsyncing btrfs: move inline function code to header file Btrfs: remove unnecessary IS_ERR in bio_readpage_error() btrfs: remove unused function btrfs_insert_some_items() Btrfs: don't commit instead of overcommitting Btrfs: confirmation of value is added before trace_btrfs_get_extent() is called Btrfs: be smarter about dropping things from the tree log Btrfs: don't lookup csums for prealloc extents Btrfs: cache extent state when writing out dirty metadata pages Btrfs: do not hold the file extent leaf locked when adding extent item ...
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r--fs/btrfs/send.c915
1 files changed, 501 insertions, 414 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fb5ffe95f869..c7beb543a4a8 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -107,7 +107,6 @@ struct send_ctx {
int cur_inode_new;
int cur_inode_new_gen;
int cur_inode_deleted;
- int cur_inode_first_ref_orphan;
u64 cur_inode_size;
u64 cur_inode_mode;
@@ -126,7 +125,15 @@ struct send_ctx {
struct name_cache_entry {
struct list_head list;
- struct list_head use_list;
+ /*
+ * radix_tree has only 32bit entries but we need to handle 64bit inums.
+ * We use the lower 32bit of the 64bit inum to store it in the tree. If
+ * more then one inum would fall into the same entry, we use radix_list
+ * to store the additional entries. radix_list is also used to store
+ * entries where two entries have the same inum but different
+ * generations.
+ */
+ struct list_head radix_list;
u64 ino;
u64 gen;
u64 parent_ino;
@@ -328,6 +335,7 @@ out:
return ret;
}
+#if 0
static void fs_path_remove(struct fs_path *p)
{
BUG_ON(p->reversed);
@@ -335,6 +343,7 @@ static void fs_path_remove(struct fs_path *p)
p->end--;
*p->end = 0;
}
+#endif
static int fs_path_copy(struct fs_path *p, struct fs_path *from)
{
@@ -377,7 +386,7 @@ static struct btrfs_path *alloc_path_for_send(void)
return path;
}
-static int write_buf(struct send_ctx *sctx, const void *buf, u32 len)
+int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
{
int ret;
mm_segment_t old_fs;
@@ -387,8 +396,7 @@ static int write_buf(struct send_ctx *sctx, const void *buf, u32 len)
set_fs(KERNEL_DS);
while (pos < len) {
- ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos,
- &sctx->send_off);
+ ret = vfs_write(filp, (char *)buf + pos, len - pos, off);
/* TODO handle that correctly */
/*if (ret == -ERESTARTSYS) {
continue;
@@ -544,7 +552,8 @@ static int send_header(struct send_ctx *sctx)
strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
- return write_buf(sctx, &hdr, sizeof(hdr));
+ return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
+ &sctx->send_off);
}
/*
@@ -581,7 +590,8 @@ static int send_cmd(struct send_ctx *sctx)
crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
hdr->crc = cpu_to_le32(crc);
- ret = write_buf(sctx, sctx->send_buf, sctx->send_size);
+ ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
+ &sctx->send_off);
sctx->total_send_size += sctx->send_size;
sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
@@ -687,7 +697,8 @@ out:
*/
static int get_inode_info(struct btrfs_root *root,
u64 ino, u64 *size, u64 *gen,
- u64 *mode, u64 *uid, u64 *gid)
+ u64 *mode, u64 *uid, u64 *gid,
+ u64 *rdev)
{
int ret;
struct btrfs_inode_item *ii;
@@ -721,6 +732,8 @@ static int get_inode_info(struct btrfs_root *root,
*uid = btrfs_inode_uid(path->nodes[0], ii);
if (gid)
*gid = btrfs_inode_gid(path->nodes[0], ii);
+ if (rdev)
+ *rdev = btrfs_inode_rdev(path->nodes[0], ii);
out:
btrfs_free_path(path);
@@ -852,7 +865,6 @@ static int iterate_dir_item(struct send_ctx *sctx,
struct extent_buffer *eb;
struct btrfs_item *item;
struct btrfs_dir_item *di;
- struct btrfs_path *tmp_path = NULL;
struct btrfs_key di_key;
char *buf = NULL;
char *buf2 = NULL;
@@ -874,12 +886,6 @@ static int iterate_dir_item(struct send_ctx *sctx,
goto out;
}
- tmp_path = alloc_path_for_send();
- if (!tmp_path) {
- ret = -ENOMEM;
- goto out;
- }
-
eb = path->nodes[0];
slot = path->slots[0];
item = btrfs_item_nr(eb, slot);
@@ -941,7 +947,6 @@ static int iterate_dir_item(struct send_ctx *sctx,
}
out:
- btrfs_free_path(tmp_path);
if (buf_virtual)
vfree(buf);
else
@@ -1026,12 +1031,12 @@ struct backref_ctx {
u64 extent_len;
/* Just to check for bugs in backref resolving */
- int found_in_send_root;
+ int found_itself;
};
static int __clone_root_cmp_bsearch(const void *key, const void *elt)
{
- u64 root = (u64)key;
+ u64 root = (u64)(uintptr_t)key;
struct clone_root *cr = (struct clone_root *)elt;
if (root < cr->root->objectid)
@@ -1055,6 +1060,7 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2)
/*
* Called for every backref that is found for the current extent.
+ * Results are collected in sctx->clone_roots->ino/offset/found_refs
*/
static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
{
@@ -1064,7 +1070,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
u64 i_size;
/* First check if the root is in the list of accepted clone sources */
- found = bsearch((void *)root, bctx->sctx->clone_roots,
+ found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
bctx->sctx->clone_roots_cnt,
sizeof(struct clone_root),
__clone_root_cmp_bsearch);
@@ -1074,14 +1080,15 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
if (found->root == bctx->sctx->send_root &&
ino == bctx->cur_objectid &&
offset == bctx->cur_offset) {
- bctx->found_in_send_root = 1;
+ bctx->found_itself = 1;
}
/*
- * There are inodes that have extents that lie behind it's i_size. Don't
+ * There are inodes that have extents that lie behind its i_size. Don't
* accept clones from these extents.
*/
- ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL);
+ ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
+ NULL);
if (ret < 0)
return ret;
@@ -1101,16 +1108,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
*/
if (ino >= bctx->cur_objectid)
return 0;
- /*if (ino > ctx->cur_objectid)
+#if 0
+ if (ino > bctx->cur_objectid)
return 0;
- if (offset + ctx->extent_len > ctx->cur_offset)
- return 0;*/
-
- bctx->found++;
- found->found_refs++;
- found->ino = ino;
- found->offset = offset;
- return 0;
+ if (offset + bctx->extent_len > bctx->cur_offset)
+ return 0;
+#endif
}
bctx->found++;
@@ -1130,6 +1133,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
}
/*
+ * Given an inode, offset and extent item, it finds a good clone for a clone
+ * instruction. Returns -ENOENT when none could be found. The function makes
+ * sure that the returned clone is usable at the point where sending is at the
+ * moment. This means, that no clones are accepted which lie behind the current
+ * inode+offset.
+ *
* path must point to the extent item when called.
*/
static int find_extent_clone(struct send_ctx *sctx,
@@ -1141,20 +1150,29 @@ static int find_extent_clone(struct send_ctx *sctx,
int ret;
int extent_type;
u64 logical;
+ u64 disk_byte;
u64 num_bytes;
u64 extent_item_pos;
+ u64 flags = 0;
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb = path->nodes[0];
- struct backref_ctx backref_ctx;
+ struct backref_ctx *backref_ctx = NULL;
struct clone_root *cur_clone_root;
struct btrfs_key found_key;
struct btrfs_path *tmp_path;
+ int compressed;
u32 i;
tmp_path = alloc_path_for_send();
if (!tmp_path)
return -ENOMEM;
+ backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
+ if (!backref_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
if (data_offset >= ino_size) {
/*
* There may be extents that lie behind the file's size.
@@ -1172,22 +1190,23 @@ static int find_extent_clone(struct send_ctx *sctx,
ret = -ENOENT;
goto out;
}
+ compressed = btrfs_file_extent_compression(eb, fi);
num_bytes = btrfs_file_extent_num_bytes(eb, fi);
- logical = btrfs_file_extent_disk_bytenr(eb, fi);
- if (logical == 0) {
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (disk_byte == 0) {
ret = -ENOENT;
goto out;
}
- logical += btrfs_file_extent_offset(eb, fi);
+ logical = disk_byte + btrfs_file_extent_offset(eb, fi);
- ret = extent_from_logical(sctx->send_root->fs_info,
- logical, tmp_path, &found_key);
+ ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
+ &found_key, &flags);
btrfs_release_path(tmp_path);
if (ret < 0)
goto out;
- if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
ret = -EIO;
goto out;
}
@@ -1202,12 +1221,12 @@ static int find_extent_clone(struct send_ctx *sctx,
cur_clone_root->found_refs = 0;
}
- backref_ctx.sctx = sctx;
- backref_ctx.found = 0;
- backref_ctx.cur_objectid = ino;
- backref_ctx.cur_offset = data_offset;
- backref_ctx.found_in_send_root = 0;
- backref_ctx.extent_len = num_bytes;
+ backref_ctx->sctx = sctx;
+ backref_ctx->found = 0;
+ backref_ctx->cur_objectid = ino;
+ backref_ctx->cur_offset = data_offset;
+ backref_ctx->found_itself = 0;
+ backref_ctx->extent_len = num_bytes;
/*
* The last extent of a file may be too large due to page alignment.
@@ -1215,25 +1234,31 @@ static int find_extent_clone(struct send_ctx *sctx,
* __iterate_backrefs work.
*/
if (data_offset + num_bytes >= ino_size)
- backref_ctx.extent_len = ino_size - data_offset;
+ backref_ctx->extent_len = ino_size - data_offset;
/*
* Now collect all backrefs.
*/
+ if (compressed == BTRFS_COMPRESS_NONE)
+ extent_item_pos = logical - found_key.objectid;
+ else
+ extent_item_pos = 0;
+
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(sctx->send_root->fs_info,
found_key.objectid, extent_item_pos, 1,
- __iterate_backrefs, &backref_ctx);
+ __iterate_backrefs, backref_ctx);
+
if (ret < 0)
goto out;
- if (!backref_ctx.found_in_send_root) {
+ if (!backref_ctx->found_itself) {
/* found a bug in backref code? */
ret = -EIO;
printk(KERN_ERR "btrfs: ERROR did not find backref in "
"send_root. inode=%llu, offset=%llu, "
- "logical=%llu\n",
- ino, data_offset, logical);
+ "disk_byte=%llu found extent=%llu\n",
+ ino, data_offset, disk_byte, found_key.objectid);
goto out;
}
@@ -1242,7 +1267,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
"num_bytes=%llu, logical=%llu\n",
data_offset, ino, num_bytes, logical);
- if (!backref_ctx.found)
+ if (!backref_ctx->found)
verbose_printk("btrfs: no clones found\n");
cur_clone_root = NULL;
@@ -1253,7 +1278,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
else if (sctx->clone_roots[i].root == sctx->send_root)
/* prefer clones from send_root over others */
cur_clone_root = sctx->clone_roots + i;
- break;
}
}
@@ -1267,6 +1291,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
out:
btrfs_free_path(tmp_path);
+ kfree(backref_ctx);
return ret;
}
@@ -1307,8 +1332,6 @@ static int read_symlink(struct send_ctx *sctx,
len = btrfs_file_extent_inline_len(path->nodes[0], ei);
ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
- if (ret < 0)
- goto out;
out:
btrfs_free_path(path);
@@ -1404,7 +1427,7 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
u64 right_gen;
ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
- NULL);
+ NULL, NULL);
if (ret < 0 && ret != -ENOENT)
goto out;
left_ret = ret;
@@ -1413,16 +1436,16 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
right_ret = -ENOENT;
} else {
ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
if (ret < 0 && ret != -ENOENT)
goto out;
right_ret = ret;
}
if (!left_ret && !right_ret) {
- if (left_gen == gen && right_gen == gen)
+ if (left_gen == gen && right_gen == gen) {
ret = inode_state_no_change;
- else if (left_gen == gen) {
+ } else if (left_gen == gen) {
if (ino < sctx->send_progress)
ret = inode_state_did_create;
else
@@ -1516,6 +1539,10 @@ out:
return ret;
}
+/*
+ * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
+ * generation of the parent dir and the name of the dir entry.
+ */
static int get_first_ref(struct send_ctx *sctx,
struct btrfs_root *root, u64 ino,
u64 *dir, u64 *dir_gen, struct fs_path *name)
@@ -1557,7 +1584,7 @@ static int get_first_ref(struct send_ctx *sctx,
btrfs_release_path(path);
ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL,
- NULL);
+ NULL, NULL);
if (ret < 0)
goto out;
@@ -1586,22 +1613,28 @@ static int is_first_ref(struct send_ctx *sctx,
if (ret < 0)
goto out;
- if (name_len != fs_path_len(tmp_name)) {
+ if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
ret = 0;
goto out;
}
- ret = memcmp(tmp_name->start, name, name_len);
- if (ret)
- ret = 0;
- else
- ret = 1;
+ ret = !memcmp(tmp_name->start, name, name_len);
out:
fs_path_free(sctx, tmp_name);
return ret;
}
+/*
+ * Used by process_recorded_refs to determine if a new ref would overwrite an
+ * already existing ref. In case it detects an overwrite, it returns the
+ * inode/gen in who_ino/who_gen.
+ * When an overwrite is detected, process_recorded_refs does proper orphanizing
+ * to make sure later references to the overwritten inode are possible.
+ * Orphanizing is however only required for the first ref of an inode.
+ * process_recorded_refs does an additional is_first_ref check to see if
+ * orphanizing is really required.
+ */
static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
const char *name, int name_len,
u64 *who_ino, u64 *who_gen)
@@ -1626,9 +1659,14 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
goto out;
}
+ /*
+ * Check if the overwritten ref was already processed. If yes, the ref
+ * was already unlinked/moved, so we can safely assume that we will not
+ * overwrite anything at this point in time.
+ */
if (other_inode > sctx->send_progress) {
ret = get_inode_info(sctx->parent_root, other_inode, NULL,
- who_gen, NULL, NULL, NULL);
+ who_gen, NULL, NULL, NULL, NULL);
if (ret < 0)
goto out;
@@ -1642,6 +1680,13 @@ out:
return ret;
}
+/*
+ * Checks if the ref was overwritten by an already processed inode. This is
+ * used by __get_cur_name_and_parent to find out if the ref was orphanized and
+ * thus the orphan name needs be used.
+ * process_recorded_refs also uses it to avoid unlinking of refs that were
+ * overwritten.
+ */
static int did_overwrite_ref(struct send_ctx *sctx,
u64 dir, u64 dir_gen,
u64 ino, u64 ino_gen,
@@ -1671,7 +1716,7 @@ static int did_overwrite_ref(struct send_ctx *sctx,
}
ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
- NULL);
+ NULL, NULL);
if (ret < 0)
goto out;
@@ -1690,6 +1735,11 @@ out:
return ret;
}
+/*
+ * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
+ * that got overwritten. This is used by process_recorded_refs to determine
+ * if it has to use the path as returned by get_cur_path or the orphan name.
+ */
static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
{
int ret = 0;
@@ -1710,39 +1760,40 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
name->start, fs_path_len(name));
- if (ret < 0)
- goto out;
out:
fs_path_free(sctx, name);
return ret;
}
+/*
+ * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
+ * so we need to do some special handling in case we have clashes. This function
+ * takes care of this with the help of name_cache_entry::radix_list.
+ * In case of error, nce is kfreed.
+ */
static int name_cache_insert(struct send_ctx *sctx,
struct name_cache_entry *nce)
{
int ret = 0;
- struct name_cache_entry **ncea;
-
- ncea = radix_tree_lookup(&sctx->name_cache, nce->ino);
- if (ncea) {
- if (!ncea[0])
- ncea[0] = nce;
- else if (!ncea[1])
- ncea[1] = nce;
- else
- BUG();
- } else {
- ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS);
- if (!ncea)
+ struct list_head *nce_head;
+
+ nce_head = radix_tree_lookup(&sctx->name_cache,
+ (unsigned long)nce->ino);
+ if (!nce_head) {
+ nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
+ if (!nce_head)
return -ENOMEM;
+ INIT_LIST_HEAD(nce_head);
- ncea[0] = nce;
- ncea[1] = NULL;
- ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea);
- if (ret < 0)
+ ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
+ if (ret < 0) {
+ kfree(nce_head);
+ kfree(nce);
return ret;
+ }
}
+ list_add_tail(&nce->radix_list, nce_head);
list_add_tail(&nce->list, &sctx->name_cache_list);
sctx->name_cache_size++;
@@ -1752,50 +1803,52 @@ static int name_cache_insert(struct send_ctx *sctx,
static void name_cache_delete(struct send_ctx *sctx,
struct name_cache_entry *nce)
{
- struct name_cache_entry **ncea;
-
- ncea = radix_tree_lookup(&sctx->name_cache, nce->ino);
- BUG_ON(!ncea);
-
- if (ncea[0] == nce)
- ncea[0] = NULL;
- else if (ncea[1] == nce)
- ncea[1] = NULL;
- else
- BUG();
+ struct list_head *nce_head;
- if (!ncea[0] && !ncea[1]) {
- radix_tree_delete(&sctx->name_cache, nce->ino);
- kfree(ncea);
- }
+ nce_head = radix_tree_lookup(&sctx->name_cache,
+ (unsigned long)nce->ino);
+ BUG_ON(!nce_head);
+ list_del(&nce->radix_list);
list_del(&nce->list);
-
sctx->name_cache_size--;
+
+ if (list_empty(nce_head)) {
+ radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
+ kfree(nce_head);
+ }
}
static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
u64 ino, u64 gen)
{
- struct name_cache_entry **ncea;
+ struct list_head *nce_head;
+ struct name_cache_entry *cur;
- ncea = radix_tree_lookup(&sctx->name_cache, ino);
- if (!ncea)
+ nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
+ if (!nce_head)
return NULL;
- if (ncea[0] && ncea[0]->gen == gen)
- return ncea[0];
- else if (ncea[1] && ncea[1]->gen == gen)
- return ncea[1];
+ list_for_each_entry(cur, nce_head, radix_list) {
+ if (cur->ino == ino && cur->gen == gen)
+ return cur;
+ }
return NULL;
}
+/*
+ * Removes the entry from the list and adds it back to the end. This marks the
+ * entry as recently used so that name_cache_clean_unused does not remove it.
+ */
static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
{
list_del(&nce->list);
list_add_tail(&nce->list, &sctx->name_cache_list);
}
+/*
+ * Remove some entries from the beginning of name_cache_list.
+ */
static void name_cache_clean_unused(struct send_ctx *sctx)
{
struct name_cache_entry *nce;
@@ -1814,13 +1867,23 @@ static void name_cache_clean_unused(struct send_ctx *sctx)
static void name_cache_free(struct send_ctx *sctx)
{
struct name_cache_entry *nce;
- struct name_cache_entry *tmp;
- list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) {
+ while (!list_empty(&sctx->name_cache_list)) {
+ nce = list_entry(sctx->name_cache_list.next,
+ struct name_cache_entry, list);
name_cache_delete(sctx, nce);
+ kfree(nce);
}
}
+/*
+ * Used by get_cur_path for each ref up to the root.
+ * Returns 0 if it succeeded.
+ * Returns 1 if the inode is not existent or got overwritten. In that case, the
+ * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
+ * is returned, parent_ino/parent_gen are not guaranteed to be valid.
+ * Returns <0 in case of error.
+ */
static int __get_cur_name_and_parent(struct send_ctx *sctx,
u64 ino, u64 gen,
u64 *parent_ino,
@@ -1832,6 +1895,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
struct btrfs_path *path = NULL;
struct name_cache_entry *nce = NULL;
+ /*
+ * First check if we already did a call to this function with the same
+ * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
+ * return the cached result.
+ */
nce = name_cache_search(sctx, ino, gen);
if (nce) {
if (ino < sctx->send_progress && nce->need_later_update) {
@@ -1854,6 +1922,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
if (!path)
return -ENOMEM;
+ /*
+ * If the inode is not existent yet, add the orphan name and return 1.
+ * This should only happen for the parent dir that we determine in
+ * __record_new_ref
+ */
ret = is_inode_existent(sctx, ino, gen);
if (ret < 0)
goto out;
@@ -1866,6 +1939,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
goto out_cache;
}
+ /*
+ * Depending on whether the inode was already processed or not, use
+ * send_root or parent_root for ref lookup.
+ */
if (ino < sctx->send_progress)
ret = get_first_ref(sctx, sctx->send_root, ino,
parent_ino, parent_gen, dest);
@@ -1875,6 +1952,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
if (ret < 0)
goto out;
+ /*
+ * Check if the ref was overwritten by an inode's ref that was processed
+ * earlier. If yes, treat as orphan and return 1.
+ */
ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
dest->start, dest->end - dest->start);
if (ret < 0)
@@ -1888,6 +1969,9 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
}
out_cache:
+ /*
+ * Store the result of the lookup in the name cache.
+ */
nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
if (!nce) {
ret = -ENOMEM;
@@ -1901,7 +1985,6 @@ out_cache:
nce->name_len = fs_path_len(dest);
nce->ret = ret;
strcpy(nce->name, dest->start);
- memset(&nce->use_list, 0, sizeof(nce->use_list));
if (ino < sctx->send_progress)
nce->need_later_update = 0;
@@ -2107,9 +2190,6 @@ static int send_subvol_begin(struct send_ctx *sctx)
read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
btrfs_release_path(path);
- if (ret < 0)
- goto out;
-
if (parent_root) {
ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
if (ret < 0)
@@ -2276,7 +2356,7 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
btrfs_inode_mtime(ii));
TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
btrfs_inode_ctime(ii));
- /* TODO otime? */
+ /* TODO Add otime support when the otime patches get into upstream */
ret = send_cmd(sctx);
@@ -2292,39 +2372,39 @@ out:
* a valid path yet because we did not process the refs yet. So, the inode
* is created as orphan.
*/
-static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path,
- struct btrfs_key *key)
+static int send_create_inode(struct send_ctx *sctx, u64 ino)
{
int ret = 0;
- struct extent_buffer *eb = path->nodes[0];
- struct btrfs_inode_item *ii;
struct fs_path *p;
- int slot = path->slots[0];
int cmd;
+ u64 gen;
u64 mode;
+ u64 rdev;
-verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino);
+verbose_printk("btrfs: send_create_inode %llu\n", ino);
p = fs_path_alloc(sctx);
if (!p)
return -ENOMEM;
- ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
- mode = btrfs_inode_mode(eb, ii);
+ ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL,
+ NULL, &rdev);
+ if (ret < 0)
+ goto out;
- if (S_ISREG(mode))
+ if (S_ISREG(mode)) {
cmd = BTRFS_SEND_C_MKFILE;
- else if (S_ISDIR(mode))
+ } else if (S_ISDIR(mode)) {
cmd = BTRFS_SEND_C_MKDIR;
- else if (S_ISLNK(mode))
+ } else if (S_ISLNK(mode)) {
cmd = BTRFS_SEND_C_SYMLINK;
- else if (S_ISCHR(mode) || S_ISBLK(mode))
+ } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
cmd = BTRFS_SEND_C_MKNOD;
- else if (S_ISFIFO(mode))
+ } else if (S_ISFIFO(mode)) {
cmd = BTRFS_SEND_C_MKFIFO;
- else if (S_ISSOCK(mode))
+ } else if (S_ISSOCK(mode)) {
cmd = BTRFS_SEND_C_MKSOCK;
- else {
+ } else {
printk(KERN_WARNING "btrfs: unexpected inode type %o",
(int)(mode & S_IFMT));
ret = -ENOTSUPP;
@@ -2335,22 +2415,22 @@ verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino);
if (ret < 0)
goto out;
- ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+ ret = gen_unique_name(sctx, ino, gen, p);
if (ret < 0)
goto out;
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
- TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, sctx->cur_ino);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
if (S_ISLNK(mode)) {
fs_path_reset(p);
- ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p);
+ ret = read_symlink(sctx, sctx->send_root, ino, p);
if (ret < 0)
goto out;
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
S_ISFIFO(mode) || S_ISSOCK(mode)) {
- TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii));
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev);
}
ret = send_cmd(sctx);
@@ -2364,6 +2444,92 @@ out:
return ret;
}
+/*
+ * We need some special handling for inodes that get processed before the parent
+ * directory got created. See process_recorded_refs for details.
+ * This function does the check if we already created the dir out of order.
+ */
+static int did_create_dir(struct send_ctx *sctx, u64 dir)
+{
+ int ret = 0;
+ struct btrfs_path *path = NULL;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_key di_key;
+ struct extent_buffer *eb;
+ struct btrfs_dir_item *di;
+ int slot;
+
+ path = alloc_path_for_send();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ key.objectid = dir;
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = 0;
+ while (1) {
+ ret = btrfs_search_slot_for_read(sctx->send_root, &key, path,
+ 1, 0);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(eb, &found_key, slot);
+ }
+ if (ret || found_key.objectid != key.objectid ||
+ found_key.type != key.type) {
+ ret = 0;
+ goto out;
+ }
+
+ di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
+ btrfs_dir_item_key_to_cpu(eb, di, &di_key);
+
+ if (di_key.objectid < sctx->send_progress) {
+ ret = 1;
+ goto out;
+ }
+
+ key.offset = found_key.offset + 1;
+ btrfs_release_path(path);
+ }
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Only creates the inode if it is:
+ * 1. Not a directory
+ * 2. Or a directory which was not created already due to out of order
+ * directories. See did_create_dir and process_recorded_refs for details.
+ */
+static int send_create_inode_if_needed(struct send_ctx *sctx)
+{
+ int ret;
+
+ if (S_ISDIR(sctx->cur_inode_mode)) {
+ ret = did_create_dir(sctx, sctx->cur_ino);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ret = send_create_inode(sctx, sctx->cur_ino);
+ if (ret < 0)
+ goto out;
+
+out:
+ return ret;
+}
+
struct recorded_ref {
struct list_head list;
char *dir_path;
@@ -2416,13 +2582,13 @@ static int record_ref(struct list_head *head, u64 dir,
static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head)
{
struct recorded_ref *cur;
- struct recorded_ref *tmp;
- list_for_each_entry_safe(cur, tmp, head, list) {
+ while (!list_empty(head)) {
+ cur = list_entry(head->next, struct recorded_ref, list);
fs_path_free(sctx, cur->full_path);
+ list_del(&cur->list);
kfree(cur);
}
- INIT_LIST_HEAD(head);
}
static void free_recorded_refs(struct send_ctx *sctx)
@@ -2432,7 +2598,7 @@ static void free_recorded_refs(struct send_ctx *sctx)
}
/*
- * Renames/moves a file/dir to it's orphan name. Used when the first
+ * Renames/moves a file/dir to its orphan name. Used when the first
* ref of an unprocessed inode gets overwritten and for all non empty
* directories.
*/
@@ -2472,6 +2638,12 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress)
struct btrfs_key loc;
struct btrfs_dir_item *di;
+ /*
+ * Don't try to rmdir the top/root subvolume dir.
+ */
+ if (dir == BTRFS_FIRST_FREE_OBJECTID)
+ return 0;
+
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
@@ -2513,160 +2685,6 @@ out:
return ret;
}
-struct finish_unordered_dir_ctx {
- struct send_ctx *sctx;
- struct fs_path *cur_path;
- struct fs_path *dir_path;
- u64 dir_ino;
- int need_delete;
- int delete_pass;
-};
-
-int __finish_unordered_dir(int num, struct btrfs_key *di_key,
- const char *name, int name_len,
- const char *data, int data_len,
- u8 type, void *ctx)
-{
- int ret = 0;
- struct finish_unordered_dir_ctx *fctx = ctx;
- struct send_ctx *sctx = fctx->sctx;
- u64 di_gen;
- u64 di_mode;
- int is_orphan = 0;
-
- if (di_key->objectid >= fctx->dir_ino)
- goto out;
-
- fs_path_reset(fctx->cur_path);
-
- ret = get_inode_info(sctx->send_root, di_key->objectid,
- NULL, &di_gen, &di_mode, NULL, NULL);
- if (ret < 0)
- goto out;
-
- ret = is_first_ref(sctx, sctx->send_root, di_key->objectid,
- fctx->dir_ino, name, name_len);
- if (ret < 0)
- goto out;
- if (ret) {
- is_orphan = 1;
- ret = gen_unique_name(sctx, di_key->objectid, di_gen,
- fctx->cur_path);
- } else {
- ret = get_cur_path(sctx, di_key->objectid, di_gen,
- fctx->cur_path);
- }
- if (ret < 0)
- goto out;
-
- ret = fs_path_add(fctx->dir_path, name, name_len);
- if (ret < 0)
- goto out;
-
- if (!fctx->delete_pass) {
- if (S_ISDIR(di_mode)) {
- ret = send_rename(sctx, fctx->cur_path,
- fctx->dir_path);
- } else {
- ret = send_link(sctx, fctx->dir_path,
- fctx->cur_path);
- if (is_orphan)
- fctx->need_delete = 1;
- }
- } else if (!S_ISDIR(di_mode)) {
- ret = send_unlink(sctx, fctx->cur_path);
- } else {
- ret = 0;
- }
-
- fs_path_remove(fctx->dir_path);
-
-out:
- return ret;
-}
-
-/*
- * Go through all dir items and see if we find refs which could not be created
- * in the past because the dir did not exist at that time.
- */
-static int finish_outoforder_dir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
-{
- int ret = 0;
- struct btrfs_path *path = NULL;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct extent_buffer *eb;
- struct finish_unordered_dir_ctx fctx;
- int slot;
-
- path = alloc_path_for_send();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- memset(&fctx, 0, sizeof(fctx));
- fctx.sctx = sctx;
- fctx.cur_path = fs_path_alloc(sctx);
- fctx.dir_path = fs_path_alloc(sctx);
- if (!fctx.cur_path || !fctx.dir_path) {
- ret = -ENOMEM;
- goto out;
- }
- fctx.dir_ino = dir;
-
- ret = get_cur_path(sctx, dir, dir_gen, fctx.dir_path);
- if (ret < 0)
- goto out;
-
- /*
- * We do two passes. The first links in the new refs and the second
- * deletes orphans if required. Deletion of orphans is not required for
- * directory inodes, as we always have only one ref and use rename
- * instead of link for those.
- */
-
-again:
- key.objectid = dir;
- key.type = BTRFS_DIR_ITEM_KEY;
- key.offset = 0;
- while (1) {
- ret = btrfs_search_slot_for_read(sctx->send_root, &key, path,
- 1, 0);
- if (ret < 0)
- goto out;
- eb = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(eb, &found_key, slot);
-
- if (found_key.objectid != key.objectid ||
- found_key.type != key.type) {
- btrfs_release_path(path);
- break;
- }
-
- ret = iterate_dir_item(sctx, sctx->send_root, path,
- &found_key, __finish_unordered_dir,
- &fctx);
- if (ret < 0)
- goto out;
-
- key.offset = found_key.offset + 1;
- btrfs_release_path(path);
- }
-
- if (!fctx.delete_pass && fctx.need_delete) {
- fctx.delete_pass = 1;
- goto again;
- }
-
-out:
- btrfs_free_path(path);
- fs_path_free(sctx, fctx.cur_path);
- fs_path_free(sctx, fctx.dir_path);
- return ret;
-}
-
/*
* This does all the move/link/unlink/rmdir magic.
*/
@@ -2674,6 +2692,7 @@ static int process_recorded_refs(struct send_ctx *sctx)
{
int ret = 0;
struct recorded_ref *cur;
+ struct recorded_ref *cur2;
struct ulist *check_dirs = NULL;
struct ulist_iterator uit;
struct ulist_node *un;
@@ -2685,6 +2704,12 @@ static int process_recorded_refs(struct send_ctx *sctx)
verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
+ /*
+ * This should never happen as the root dir always has the same ref
+ * which is always '..'
+ */
+ BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
+
valid_path = fs_path_alloc(sctx);
if (!valid_path) {
ret = -ENOMEM;
@@ -2731,6 +2756,46 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
list_for_each_entry(cur, &sctx->new_refs, list) {
/*
+ * We may have refs where the parent directory does not exist
+ * yet. This happens if the parent directories inum is higher
+ * the the current inum. To handle this case, we create the
+ * parent directory out of order. But we need to check if this
+ * did already happen before due to other refs in the same dir.
+ */
+ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
+ if (ret < 0)
+ goto out;
+ if (ret == inode_state_will_create) {
+ ret = 0;
+ /*
+ * First check if any of the current inodes refs did
+ * already create the dir.
+ */
+ list_for_each_entry(cur2, &sctx->new_refs, list) {
+ if (cur == cur2)
+ break;
+ if (cur2->dir == cur->dir) {
+ ret = 1;
+ break;
+ }
+ }
+
+ /*
+ * If that did not happen, check if a previous inode
+ * did already create the dir.
+ */
+ if (!ret)
+ ret = did_create_dir(sctx, cur->dir);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = send_create_inode(sctx, cur->dir);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ /*
* Check if this new ref would overwrite the first ref of
* another unprocessed inode. If yes, orphanize the
* overwritten inode. If we find an overwritten ref that is
@@ -2764,7 +2829,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
* inode, move it and update valid_path. If not, link or move
* it depending on the inode mode.
*/
- if (is_orphan && !sctx->cur_inode_first_ref_orphan) {
+ if (is_orphan) {
ret = send_rename(sctx, valid_path, cur->full_path);
if (ret < 0)
goto out;
@@ -2827,6 +2892,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
if (ret < 0)
goto out;
}
+ } else if (S_ISDIR(sctx->cur_inode_mode) &&
+ !list_empty(&sctx->deleted_refs)) {
+ /*
+ * We have a moved dir. Add the old parent to check_dirs
+ */
+ cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
+ list);
+ ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
+ GFP_NOFS);
+ if (ret < 0)
+ goto out;
} else if (!S_ISDIR(sctx->cur_inode_mode)) {
/*
* We have a non dir inode. Go through all deleted refs and
@@ -2840,35 +2916,9 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
if (ret < 0)
goto out;
if (!ret) {
- /*
- * In case the inode was moved to a directory
- * that was not created yet (see
- * __record_new_ref), we can not unlink the ref
- * as it will be needed later when the parent
- * directory is created, so that we can move in
- * the inode to the new dir.
- */
- if (!is_orphan &&
- sctx->cur_inode_first_ref_orphan) {
- ret = orphanize_inode(sctx,
- sctx->cur_ino,
- sctx->cur_inode_gen,
- cur->full_path);
- if (ret < 0)
- goto out;
- ret = gen_unique_name(sctx,
- sctx->cur_ino,
- sctx->cur_inode_gen,
- valid_path);
- if (ret < 0)
- goto out;
- is_orphan = 1;
-
- } else {
- ret = send_unlink(sctx, cur->full_path);
- if (ret < 0)
- goto out;
- }
+ ret = send_unlink(sctx, cur->full_path);
+ if (ret < 0)
+ goto out;
}
ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
GFP_NOFS);
@@ -2880,12 +2930,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
* If the inode is still orphan, unlink the orphan. This may
* happen when a previous inode did overwrite the first ref
* of this inode and no new refs were added for the current
- * inode.
- * We can however not delete the orphan in case the inode relies
- * in a directory that was not created yet (see
- * __record_new_ref)
+ * inode. Unlinking does not mean that the inode is deleted in
+ * all cases. There may still be links to this inode in other
+ * places.
*/
- if (is_orphan && !sctx->cur_inode_first_ref_orphan) {
+ if (is_orphan) {
ret = send_unlink(sctx, valid_path);
if (ret < 0)
goto out;
@@ -2900,6 +2949,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
*/
ULIST_ITER_INIT(&uit);
while ((un = ulist_next(check_dirs, &uit))) {
+ /*
+ * In case we had refs into dirs that were not processed yet,
+ * we don't need to do the utime and rmdir logic for these dirs.
+ * The dir will be processed later.
+ */
if (un->val > sctx->cur_ino)
continue;
@@ -2929,25 +2983,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
}
}
- /*
- * Current inode is now at it's new position, so we must increase
- * send_progress
- */
- sctx->send_progress = sctx->cur_ino + 1;
-
- /*
- * We may have a directory here that has pending refs which could not
- * be created before (because the dir did not exist before, see
- * __record_new_ref). finish_outoforder_dir will link/move the pending
- * refs.
- */
- if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_new) {
- ret = finish_outoforder_dir(sctx, sctx->cur_ino,
- sctx->cur_inode_gen);
- if (ret < 0)
- goto out;
- }
-
ret = 0;
out:
@@ -2971,34 +3006,9 @@ static int __record_new_ref(int num, u64 dir, int index,
return -ENOMEM;
ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
- NULL);
- if (ret < 0)
- goto out;
-
- /*
- * The parent may be non-existent at this point in time. This happens
- * if the ino of the parent dir is higher then the current ino. In this
- * case, we can not process this ref until the parent dir is finally
- * created. If we reach the parent dir later, process_recorded_refs
- * will go through all dir items and process the refs that could not be
- * processed before. In case this is the first ref, we set
- * cur_inode_first_ref_orphan to 1 to inform process_recorded_refs to
- * keep an orphan of the inode so that it later can be used for
- * link/move
- */
- ret = is_inode_existent(sctx, dir, gen);
+ NULL, NULL);
if (ret < 0)
goto out;
- if (!ret) {
- ret = is_first_ref(sctx, sctx->send_root, sctx->cur_ino, dir,
- name->start, fs_path_len(name));
- if (ret < 0)
- goto out;
- if (ret)
- sctx->cur_inode_first_ref_orphan = 1;
- ret = 0;
- goto out;
- }
ret = get_cur_path(sctx, dir, gen, p);
if (ret < 0)
@@ -3029,7 +3039,7 @@ static int __record_deleted_ref(int num, u64 dir, int index,
return -ENOMEM;
ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
- NULL);
+ NULL, NULL);
if (ret < 0)
goto out;
@@ -3206,33 +3216,28 @@ static int process_all_refs(struct send_ctx *sctx,
key.offset = 0;
while (1) {
ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
- if (ret < 0) {
- btrfs_release_path(path);
+ if (ret < 0)
goto out;
- }
- if (ret) {
- btrfs_release_path(path);
+ if (ret)
break;
- }
eb = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(eb, &found_key, slot);
if (found_key.objectid != key.objectid ||
- found_key.type != key.type) {
- btrfs_release_path(path);
+ found_key.type != key.type)
break;
- }
- ret = iterate_inode_ref(sctx, sctx->parent_root, path,
- &found_key, 0, cb, sctx);
+ ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb,
+ sctx);
btrfs_release_path(path);
if (ret < 0)
goto out;
key.offset = found_key.offset + 1;
}
+ btrfs_release_path(path);
ret = process_recorded_refs(sctx);
@@ -3555,7 +3560,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
int ret = 0;
struct fs_path *p;
loff_t pos = offset;
- int readed = 0;
+ int num_read = 0;
mm_segment_t old_fs;
p = fs_path_alloc(sctx);
@@ -3580,8 +3585,8 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
if (ret < 0)
goto out;
- readed = ret;
- if (!readed)
+ num_read = ret;
+ if (!num_read)
goto out;
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
@@ -3594,7 +3599,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
- TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed);
+ TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
ret = send_cmd(sctx);
@@ -3604,7 +3609,7 @@ out:
set_fs(old_fs);
if (ret < 0)
return ret;
- return readed;
+ return num_read;
}
/*
@@ -3615,7 +3620,6 @@ static int send_clone(struct send_ctx *sctx,
struct clone_root *clone_root)
{
int ret = 0;
- struct btrfs_root *clone_root2 = clone_root->root;
struct fs_path *p;
u64 gen;
@@ -3640,22 +3644,23 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
- if (clone_root2 == sctx->send_root) {
+ if (clone_root->root == sctx->send_root) {
ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
- &gen, NULL, NULL, NULL);
+ &gen, NULL, NULL, NULL, NULL);
if (ret < 0)
goto out;
ret = get_cur_path(sctx, clone_root->ino, gen, p);
} else {
- ret = get_inode_path(sctx, clone_root2, clone_root->ino, p);
+ ret = get_inode_path(sctx, clone_root->root,
+ clone_root->ino, p);
}
if (ret < 0)
goto out;
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
- clone_root2->root_item.uuid);
+ clone_root->root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
- clone_root2->root_item.ctransid);
+ clone_root->root->root_item.ctransid);
TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
clone_root->offset);
@@ -3684,10 +3689,17 @@ static int send_write_or_clone(struct send_ctx *sctx,
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], ei);
- if (type == BTRFS_FILE_EXTENT_INLINE)
+ if (type == BTRFS_FILE_EXTENT_INLINE) {
len = btrfs_file_extent_inline_len(path->nodes[0], ei);
- else
+ /*
+ * it is possible the inline item won't cover the whole page,
+ * but there may be items after this page. Make
+ * sure to send the whole thing
+ */
+ len = PAGE_CACHE_ALIGN(len);
+ } else {
len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
+ }
if (offset + len > sctx->cur_inode_size)
len = sctx->cur_inode_size - offset;
@@ -3735,6 +3747,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
u64 left_offset_fixed;
u64 left_len;
u64 right_len;
+ u64 left_gen;
+ u64 right_gen;
u8 left_type;
u8 right_type;
@@ -3744,17 +3758,17 @@ static int is_extent_unchanged(struct send_ctx *sctx,
eb = left_path->nodes[0];
slot = left_path->slots[0];
-
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
left_type = btrfs_file_extent_type(eb, ei);
- left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
- left_len = btrfs_file_extent_num_bytes(eb, ei);
- left_offset = btrfs_file_extent_offset(eb, ei);
if (left_type != BTRFS_FILE_EXTENT_REG) {
ret = 0;
goto out;
}
+ left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
+ left_len = btrfs_file_extent_num_bytes(eb, ei);
+ left_offset = btrfs_file_extent_offset(eb, ei);
+ left_gen = btrfs_file_extent_generation(eb, ei);
/*
* Following comments will refer to these graphics. L is the left
@@ -3810,6 +3824,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
right_len = btrfs_file_extent_num_bytes(eb, ei);
right_offset = btrfs_file_extent_offset(eb, ei);
+ right_gen = btrfs_file_extent_generation(eb, ei);
if (right_type != BTRFS_FILE_EXTENT_REG) {
ret = 0;
@@ -3820,7 +3835,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
* Are we at extent 8? If yes, we know the extent is changed.
* This may only happen on the first iteration.
*/
- if (found_key.offset + right_len < ekey->offset) {
+ if (found_key.offset + right_len <= ekey->offset) {
ret = 0;
goto out;
}
@@ -3837,8 +3852,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
/*
* Check if we have the same extent.
*/
- if (left_disknr + left_offset_fixed !=
- right_disknr + right_offset) {
+ if (left_disknr != right_disknr ||
+ left_offset_fixed != right_offset ||
+ left_gen != right_gen) {
ret = 0;
goto out;
}
@@ -3977,6 +3993,15 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
goto out;
ret = process_recorded_refs(sctx);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * We have processed the refs and thus need to advance send_progress.
+ * Now, calls to get_cur_xxx will take the updated refs of the current
+ * inode into account.
+ */
+ sctx->send_progress = sctx->cur_ino + 1;
out:
return ret;
@@ -4004,7 +4029,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
- &left_mode, &left_uid, &left_gid);
+ &left_mode, &left_uid, &left_gid, NULL);
if (ret < 0)
goto out;
@@ -4015,7 +4040,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
} else {
ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
NULL, NULL, &right_mode, &right_uid,
- &right_gid);
+ &right_gid, NULL);
if (ret < 0)
goto out;
@@ -4074,7 +4099,12 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_ino = key->objectid;
sctx->cur_inode_new_gen = 0;
- sctx->cur_inode_first_ref_orphan = 0;
+
+ /*
+ * Set send_progress to current inode. This will tell all get_cur_xxx
+ * functions that the current inode's refs are not updated yet. Later,
+ * when process_recorded_refs is finished, it is set to cur_ino + 1.
+ */
sctx->send_progress = sctx->cur_ino;
if (result == BTRFS_COMPARE_TREE_NEW ||
@@ -4098,7 +4128,14 @@ static int changed_inode(struct send_ctx *sctx,
right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
right_ii);
- if (left_gen != right_gen)
+
+ /*
+ * The cur_ino = root dir case is special here. We can't treat
+ * the inode as deleted+reused because it would generate a
+ * stream that tries to delete/mkdir the root dir.
+ */
+ if (left_gen != right_gen &&
+ sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
sctx->cur_inode_new_gen = 1;
}
@@ -4111,8 +4148,7 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_mode = btrfs_inode_mode(
sctx->left_path->nodes[0], left_ii);
if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
- ret = send_create_inode(sctx, sctx->left_path,
- sctx->cmp_key);
+ ret = send_create_inode_if_needed(sctx);
} else if (result == BTRFS_COMPARE_TREE_DELETED) {
sctx->cur_inode_gen = right_gen;
sctx->cur_inode_new = 0;
@@ -4122,7 +4158,17 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_mode = btrfs_inode_mode(
sctx->right_path->nodes[0], right_ii);
} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
+ /*
+ * We need to do some special handling in case the inode was
+ * reported as changed with a changed generation number. This
+ * means that the original inode was deleted and new inode
+ * reused the same inum. So we have to treat the old inode as
+ * deleted and the new one as new.
+ */
if (sctx->cur_inode_new_gen) {
+ /*
+ * First, process the inode as if it was deleted.
+ */
sctx->cur_inode_gen = right_gen;
sctx->cur_inode_new = 0;
sctx->cur_inode_deleted = 1;
@@ -4135,6 +4181,9 @@ static int changed_inode(struct send_ctx *sctx,
if (ret < 0)
goto out;
+ /*
+ * Now process the inode as if it was new.
+ */
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = 1;
sctx->cur_inode_deleted = 0;
@@ -4142,14 +4191,23 @@ static int changed_inode(struct send_ctx *sctx,
sctx->left_path->nodes[0], left_ii);
sctx->cur_inode_mode = btrfs_inode_mode(
sctx->left_path->nodes[0], left_ii);
- ret = send_create_inode(sctx, sctx->left_path,
- sctx->cmp_key);
+ ret = send_create_inode_if_needed(sctx);
if (ret < 0)
goto out;
ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
if (ret < 0)
goto out;
+ /*
+ * Advance send_progress now as we did not get into
+ * process_recorded_refs_if_needed in the new_gen case.
+ */
+ sctx->send_progress = sctx->cur_ino + 1;
+
+ /*
+ * Now process all extents and xattrs of the inode as if
+ * they were all new.
+ */
ret = process_all_extents(sctx);
if (ret < 0)
goto out;
@@ -4172,6 +4230,16 @@ out:
return ret;
}
+/*
+ * We have to process new refs before deleted refs, but compare_trees gives us
+ * the new and deleted refs mixed. To fix this, we record the new/deleted refs
+ * first and later process them in process_recorded_refs.
+ * For the cur_inode_new_gen case, we skip recording completely because
+ * changed_inode did already initiate processing of refs. The reason for this is
+ * that in this case, compare_tree actually compares the refs of 2 different
+ * inodes. To fix this, process_all_refs is used in changed_inode to handle all
+ * refs of the right tree as deleted and all refs of the left tree as new.
+ */
static int changed_ref(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -4192,6 +4260,11 @@ static int changed_ref(struct send_ctx *sctx,
return ret;
}
+/*
+ * Process new/deleted/changed xattrs. We skip processing in the
+ * cur_inode_new_gen case because changed_inode did already initiate processing
+ * of xattrs. The reason is the same as in changed_ref
+ */
static int changed_xattr(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -4211,6 +4284,11 @@ static int changed_xattr(struct send_ctx *sctx,
return ret;
}
+/*
+ * Process new/deleted/changed extents. We skip processing in the
+ * cur_inode_new_gen case because changed_inode did already initiate processing
+ * of extents. The reason is the same as in changed_ref
+ */
static int changed_extent(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -4227,7 +4305,10 @@ static int changed_extent(struct send_ctx *sctx,
return ret;
}
-
+/*
+ * Updates compare related fields in sctx and simply forwards to the actual
+ * changed_xxx functions.
+ */
static int changed_cb(struct btrfs_root *left_root,
struct btrfs_root *right_root,
struct btrfs_path *left_path,
@@ -4247,6 +4328,11 @@ static int changed_cb(struct btrfs_root *left_root,
if (ret < 0)
goto out;
+ /* Ignore non-FS objects */
+ if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
+ key->objectid == BTRFS_FREE_SPACE_OBJECTID)
+ goto out;
+
if (key->type == BTRFS_INODE_ITEM_KEY)
ret = changed_inode(sctx, result);
else if (key->type == BTRFS_INODE_REF_KEY)
@@ -4299,7 +4385,8 @@ join_trans:
}
/*
- * Make sure the tree has not changed
+ * Make sure the tree has not changed after re-joining. We detect this
+ * by comparing start_ctransid and ctransid. They should always match.
*/
spin_lock(&send_root->root_times_lock);
ctransid = btrfs_root_ctransid(&send_root->root_item);