From 0596a9048bf2aca2a74b312493f39e4d5ac3b653 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 14 Jun 2016 14:18:27 +0100 Subject: Btrfs: add missing check for writeback errors on fsync When we start an fsync we start ordered extents for all delalloc ranges. However before attempting to log the inode, we only wait for those ordered extents if we are not doing a full sync (bit BTRFS_INODE_NEEDS_FULL_SYNC is set in the inode's flags). This means that if an ordered extent completes with an IO error before we check if we can skip logging the inode, we will not catch and report the IO error to user space. This is because on an IO error, when the ordered extent completes we do not update the inode, so if the inode was not previously updated by the current transaction we end up not logging it through calls to fsync and therefore not check its mapping flags for the presence of IO errors. Fix this by checking for errors in the flags of the inode's mapping when we notice we can skip logging the inode. This caused sporadic failures in the test generic/331 (which explicitly tests for IO errors during an fsync call). Signed-off-by: Filipe Manana Reviewed-by: Liu Bo --- fs/btrfs/file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bcfb4a27ddd4..6f049109fffe 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); + /* + * An ordered extent might have started before and completed + * already with io errors, in which case the inode was not + * updated and we end up here. So check the inode's mapping + * flags for any errors that might have happened while doing + * writeback of file data. + */ + ret = btrfs_inode_check_errors(inode); inode_unlock(inode); goto out; } -- cgit v1.2.3 From 801bec365e0e19f2ba066cd3e25a67dee21b4aae Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Tue, 23 Jun 2015 18:39:46 +0800 Subject: Btrfs: send, fix failure to move directories with the same name around When doing an incremental send we can end up not moving directories that have the same name. This happens when the same parent directory has different child directories with the same name in the parent and send snapshots. For example, consider the following scenario: Parent snapshot: . (ino 256) |---- d/ (ino 257) | |--- p1/ (ino 258) | |---- p1/ (ino 259) Send snapshot: . (ino 256) |--- d/ (ino 257) |--- p1/ (ino 259) |--- p1/ (ino 258) The directory named "d" (inode 257) has in both snapshots an entry with the name "p1" but it refers to different inodes in both snapshots (inode 258 in the parent snapshot and inode 259 in the send snapshot). When attempting to move inode 258, the operation is delayed because its new parent, inode 259, was not yet moved/renamed (as the stream is currently processing inode 258). Then when processing inode 259, we also end up delaying its move/rename operation so that it happens after inode 258 is moved/renamed. This decision to delay the move/rename rename operation of inode 259 is due to the fact that the new parent inode (257) still has inode 258 as its child, which has the same name has inode 259. So we end up with inode 258 move/rename operation waiting for inode's 259 move/rename operation, which in turn it waiting for inode's 258 move/rename. This results in ending the send stream without issuing move/rename operations for inodes 258 and 259 and generating the following warnings in syslog/dmesg: [148402.979747] ------------[ cut here ]------------ [148402.980588] WARNING: CPU: 14 PID: 4117 at fs/btrfs/send.c:6177 btrfs_ioctl_send+0xe03/0xe51 [btrfs] [148402.981928] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq tpm_tis ppdev tpm parport_pc psmouse parport sg pcspkr i2c_piix4 i2c_core evdev processor serio_raw button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy [last unloaded: btrfs] [148402.986999] CPU: 14 PID: 4117 Comm: btrfs Tainted: G W 4.6.0-rc7-btrfs-next-31+ #1 [148402.988136] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [148402.988136] 0000000000000000 ffff88022139fca8 ffffffff8126b42c 0000000000000000 [148402.988136] 0000000000000000 ffff88022139fce8 ffffffff81052b14 000018212139fac8 [148402.988136] ffff88022b0db400 0000000000000000 0000000000000001 0000000000000000 [148402.988136] Call Trace: [148402.988136] [] dump_stack+0x67/0x90 [148402.988136] [] __warn+0xc2/0xdd [148402.988136] [] warn_slowpath_null+0x1d/0x1f [148402.988136] [] btrfs_ioctl_send+0xe03/0xe51 [btrfs] [148402.988136] [] btrfs_ioctl+0x14f/0x1f81 [btrfs] [148402.988136] [] ? arch_local_irq_save+0x9/0xc [148402.988136] [] ? __lock_is_held+0x3c/0x57 [148402.988136] [] vfs_ioctl+0x18/0x34 [148402.988136] [] do_vfs_ioctl+0x550/0x5be [148402.988136] [] ? __fget+0x6b/0x77 [148402.988136] [] ? __fget_light+0x62/0x71 [148402.988136] [] SyS_ioctl+0x57/0x79 [148402.988136] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [148402.988136] [] ? trace_hardirqs_off_caller+0x3f/0xaa [148403.011373] ---[ end trace a4539270c8056f8b ]--- [148403.012296] ------------[ cut here ]------------ [148403.013071] WARNING: CPU: 14 PID: 4117 at fs/btrfs/send.c:6194 btrfs_ioctl_send+0xe19/0xe51 [btrfs] [148403.014447] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq tpm_tis ppdev tpm parport_pc psmouse parport sg pcspkr i2c_piix4 i2c_core evdev processor serio_raw button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy [last unloaded: btrfs] [148403.019708] CPU: 14 PID: 4117 Comm: btrfs Tainted: G W 4.6.0-rc7-btrfs-next-31+ #1 [148403.020104] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [148403.020104] 0000000000000000 ffff88022139fca8 ffffffff8126b42c 0000000000000000 [148403.020104] 0000000000000000 ffff88022139fce8 ffffffff81052b14 000018322139fac8 [148403.020104] ffff88022b0db400 0000000000000000 0000000000000001 0000000000000000 [148403.020104] Call Trace: [148403.020104] [] dump_stack+0x67/0x90 [148403.020104] [] __warn+0xc2/0xdd [148403.020104] [] warn_slowpath_null+0x1d/0x1f [148403.020104] [] btrfs_ioctl_send+0xe19/0xe51 [btrfs] [148403.020104] [] btrfs_ioctl+0x14f/0x1f81 [btrfs] [148403.020104] [] ? arch_local_irq_save+0x9/0xc [148403.020104] [] ? __lock_is_held+0x3c/0x57 [148403.020104] [] vfs_ioctl+0x18/0x34 [148403.020104] [] do_vfs_ioctl+0x550/0x5be [148403.020104] [] ? __fget+0x6b/0x77 [148403.020104] [] ? __fget_light+0x62/0x71 [148403.020104] [] SyS_ioctl+0x57/0x79 [148403.020104] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [148403.020104] [] ? trace_hardirqs_off_caller+0x3f/0xaa [148403.038981] ---[ end trace a4539270c8056f8c ]--- There's another issue caused by similar (but more complex) changes in the directory hierarchy that makes move/rename operations fail, described with the following example: Parent snapshot: . |---- a/ (ino 262) | |---- c/ (ino 268) | |---- d/ (ino 263) |---- ance/ (ino 267) |---- e/ (ino 264) |---- f/ (ino 265) |---- ance/ (ino 266) Send snapshot: . |---- a/ (ino 262) |---- c/ (ino 268) | |---- ance/ (ino 267) | |---- d/ (ino 263) | |---- ance/ (ino 266) | |---- f/ (ino 265) |---- e/ (ino 264) When the inode 265 is processed, the path for inode 267 is computed, which at that time corresponds to "d/ance", and it's stored in the names cache. Later on when processing inode 266, we end up orphanizing (renaming to a name matching the pattern o--) inode 267 because it has the same name as inode 266 and it's currently a child of the new parent directory (inode 263) for inode 266. After the orphanization and while we are still processing inode 266, a rename operation for inode 266 is generated. However the source path for that rename operation is incorrect because it ends up using the old, pre-orphanization, name of inode 267. The no longer valid name for inode 267 was previously cached when processing inode 265 and it remains usable and considered valid until the inode currently being processed has a number greater than 267. This resulted in the receiving side failing with the following error: ERROR: rename d/ance/ance -> d/ance failed: No such file or directory So fix these issues by detecting such circular dependencies for rename operations and by clearing the cached name of an inode once the inode is orphanized. A test case for fstests will follow soon. Signed-off-by: Robbie Ko Signed-off-by: Filipe Manana [Rewrote change log to be more detailed and organized, and improved comments] Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 95 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index b71dd298385c..993e1bab0a6b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -231,7 +231,6 @@ struct pending_dir_move { u64 parent_ino; u64 ino; u64 gen; - bool is_orphan; struct list_head update_refs; }; @@ -1861,7 +1860,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, * was already unlinked/moved, so we can safely assume that we will not * overwrite anything at this point in time. */ - if (other_inode > sctx->send_progress) { + if (other_inode > sctx->send_progress || + is_waiting_for_move(sctx, other_inode)) { ret = get_inode_info(sctx->parent_root, other_inode, NULL, who_gen, NULL, NULL, NULL, NULL); if (ret < 0) @@ -3047,7 +3047,6 @@ static int add_pending_dir_move(struct send_ctx *sctx, pm->parent_ino = parent_ino; pm->ino = ino; pm->gen = ino_gen; - pm->is_orphan = is_orphan; INIT_LIST_HEAD(&pm->list); INIT_LIST_HEAD(&pm->update_refs); RB_CLEAR_NODE(&pm->node); @@ -3113,6 +3112,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, return NULL; } +static int path_loop(struct send_ctx *sctx, struct fs_path *name, + u64 ino, u64 gen, u64 *ancestor_ino) +{ + int ret = 0; + u64 parent_inode = 0; + u64 parent_gen = 0; + u64 start_ino = ino; + + *ancestor_ino = 0; + while (ino != BTRFS_FIRST_FREE_OBJECTID) { + fs_path_reset(name); + + if (is_waiting_for_rm(sctx, ino)) + break; + if (is_waiting_for_move(sctx, ino)) { + if (*ancestor_ino == 0) + *ancestor_ino = ino; + ret = get_first_ref(sctx->parent_root, ino, + &parent_inode, &parent_gen, name); + } else { + ret = __get_cur_name_and_parent(sctx, ino, gen, + &parent_inode, + &parent_gen, name); + if (ret > 0) { + ret = 0; + break; + } + } + if (ret < 0) + break; + if (parent_inode == start_ino) { + ret = 1; + if (*ancestor_ino == 0) + *ancestor_ino = ino; + break; + } + ino = parent_inode; + gen = parent_gen; + } + return ret; +} + static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) { struct fs_path *from_path = NULL; @@ -3123,6 +3164,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) u64 parent_ino, parent_gen; struct waiting_dir_move *dm = NULL; u64 rmdir_ino = 0; + u64 ancestor; + bool is_orphan; int ret; name = fs_path_alloc(); @@ -3135,9 +3178,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); rmdir_ino = dm->rmdir_ino; + is_orphan = dm->orphanized; free_waiting_dir_move(sctx, dm); - if (pm->is_orphan) { + if (is_orphan) { ret = gen_unique_name(sctx, pm->ino, pm->gen, from_path); } else { @@ -3155,6 +3199,22 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) goto out; sctx->send_progress = sctx->cur_ino + 1; + ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); + if (ret) { + LIST_HEAD(deleted_refs); + ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); + ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, + &pm->update_refs, &deleted_refs, + is_orphan); + if (ret < 0) + goto out; + if (rmdir_ino) { + dm = get_waiting_dir_move(sctx, pm->ino); + ASSERT(dm); + dm->rmdir_ino = rmdir_ino; + } + goto out; + } fs_path_reset(name); to_path = name; name = NULL; @@ -3325,6 +3385,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, u64 left_gen; u64 right_gen; int ret = 0; + struct waiting_dir_move *wdm; if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) return 0; @@ -3383,7 +3444,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx, goto out; } - if (is_waiting_for_move(sctx, di_key.objectid)) { + wdm = get_waiting_dir_move(sctx, di_key.objectid); + if (wdm && !wdm->orphanized) { ret = add_pending_dir_move(sctx, sctx->cur_ino, sctx->cur_inode_gen, @@ -3643,11 +3705,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; if (ret) { struct name_cache_entry *nce; + struct waiting_dir_move *wdm; ret = orphanize_inode(sctx, ow_inode, ow_gen, cur->full_path); if (ret < 0) goto out; + + /* + * If ow_inode has its rename operation delayed + * make sure that its orphanized name is used in + * the source path when performing its rename + * operation. + */ + if (is_waiting_for_move(sctx, ow_inode)) { + wdm = get_waiting_dir_move(sctx, + ow_inode); + ASSERT(wdm); + wdm->orphanized = true; + } + /* * Make sure we clear our orphanized inode's * name from the name cache. This is because the @@ -3663,6 +3740,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); name_cache_delete(sctx, nce); kfree(nce); } + + /* + * ow_inode might currently be an ancestor of + * cur_ino, therefore compute valid_path (the + * current path of cur_ino) again because it + * might contain the pre-orphanization name of + * ow_inode, which is no longer valid. + */ + fs_path_reset(valid_path); + ret = get_cur_path(sctx, sctx->cur_ino, + sctx->cur_inode_gen, valid_path); + if (ret < 0) + goto out; } else { ret = send_unlink(sctx, cur->full_path); if (ret < 0) -- cgit v1.2.3 From 7969e77a73164b418da851cbae35cdd6c1b43fee Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Jun 2016 17:13:36 +0100 Subject: Btrfs: send, add missing error check for calls to path_loop() The function path_loop() can return a negative integer, signaling an error, 0 if there's no path loop and 1 if there's a path loop. We were treating any non zero values as meaning that a path loop exists. Fix this by explicitly checking for errors and gracefully return them to user space. Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 993e1bab0a6b..0dc05bb856ff 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3200,6 +3200,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) sctx->send_progress = sctx->cur_ino + 1; ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); + if (ret < 0) + goto out; if (ret) { LIST_HEAD(deleted_refs); ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); -- cgit v1.2.3 From 4122ea64f877f632390bca1886c8a6f666f28398 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 27 Jun 2016 16:54:44 +0100 Subject: Btrfs: incremental send, fix invalid paths for rename operations Example scenario: Parent snapshot: . (ino 277) |---- tmp/ (ino 278) |---- pre/ (ino 280) | |---- wait_dir/ (ino 281) | |---- desc/ (ino 282) |---- ance/ (ino 283) | |---- below_ance/ (ino 279) | |---- other_dir/ (ino 284) Send snapshot: . (ino 277) |---- tmp/ (ino 278) |---- other_dir/ (ino 284) |---- below_ance/ (ino 279) | |---- pre/ (ino 280) | |---- wait_dir/ (ino 281) |---- desc/ (ino 282) |---- ance/ (ino 283) While computing the send stream the following steps happen: 1) While processing inode 279 we end up delaying its rename operation because its new parent in the send snapshot, inode 284, was not yet processed and therefore not yet renamed; 2) Later when processing inode 280 we end up renaming it immediately to "ance/below_once/pre" and not delay its rename operation because its new parent (inode 279 in the send snapshot) has its rename operation delayed and inode 280 is not an encestor of inode 279 (its parent in the send snapshot) in the parent snapshot; 3) When processing inode 281 we end up delaying its rename operation because its new parent in the send snapshot, inode 284, was not yet processed and therefore not yet renamed; 4) When processing inode 282 we do not delay its rename operation because its parent in the send snapshot, inode 281, already has its own rename operation delayed and our current inode (282) is not an ancestor of inode 281 in the parent snapshot. Therefore inode 282 is renamed to "ance/below_ance/pre/wait_dir"; 5) When processing inode 283 we realize that we can rename it because one of its ancestors in the send snapshot, inode 281, has its rename operation delayed and inode 283 is not an ancestor of inode 281 in the parent snapshot. So a rename operation to rename inode 283 to "ance/below_ance/pre/wait_dir/desc/ance" is issued. This path is invalid due to a missing path building loop that was undetected by the incremental send implementation, as inode 283 ends up getting included twice in the path (once with its path in the parent snapshot). Therefore its rename operation must wait before the ancestor inode 284 is renamed. Fix this by not terminating the rename dependency checks when we find an ancestor, in the send snapshot, that has its rename operation delayed. So that we continue doing the same checks if the current inode is not an ancestor, in the parent snapshot, of an ancestor in the send snapshot we are processing in the loop. The problem and reproducer were reported by Robbie Ko, as part of a patch titled "Btrfs: incremental send, avoid ancestor rename to descendant". However the fix was unnecessarily complicated and can be addressed with much less code and effort. Reported-by: Robbie Ko Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 0dc05bb856ff..3d6a4dde8fe4 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3534,7 +3534,8 @@ static int wait_for_parent_move(struct send_ctx *sctx, ret = is_ancestor(sctx->parent_root, sctx->cur_ino, sctx->cur_inode_gen, ino, path_before); - break; + if (ret) + break; } fs_path_reset(path_before); -- cgit v1.2.3 From 99ea42ddb13bad6188d4f183f8866cacd14fa141 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Tue, 23 Jun 2015 18:39:49 +0800 Subject: Btrfs: incremental send, fix premature rmdir operations Under certain situations, an incremental send operation can contain a rmdir operation that will make the receiving end fail when attempting to execute it, because the target directory is not yet empty. Consider the following example: Parent snapshot: . (ino 256) |--- a/ (ino 257) | |--- c/ (ino 260) | |--- del/ (ino 259) |--- tmp/ (ino 258) |--- x/ (ino 261) Send snapshot: . (ino 256) |--- a/ (ino 257) | |--- x/ (ino 261) | |--- c/ (ino 260) |--- tmp/ (ino 258) 1) When processing inode 258, we delay its rename operation because inode 260 is its new parent in the send snapshot and it was not yet renamed (since 260 > 258, that is, beyond the current progress); 2) When processing inode 259, we realize we can not yet send an rmdir operation (against inode 259) because inode 258 was still not yet renamed/moved away from inode 259. Therefore we update data structures so that after inode 258 is renamed, we try again to see if we can finally send an rmdir operation for inode 259; 3) When we process inode 260, we send a rename operation for it followed by a rename operation for inode 258. Once we send the rename operation for inode 258 we then check if we can finally issue an rmdir for its previous parent, inode 259, by calling the can_rmdir() function with a value of sctx->cur_ino + 1 (260 + 1 = 261) for its "progress" argument. This makes can_rmdir() return true (value 1) because even though there's still a child inode of inode 259 that was not yet renamed/moved, which is inode 261, the given value of progress (261) is not lower then 261 (that is, not lower than the inode number of some child of inode 259). So we end up sending a rmdir operation for inode 259 before its child inode 261 is processed and renamed. So fix this by passing the correct progress value to the call to can_rmdir() from within apply_dir_move() (where we issue delayed rename operations), which should match stcx->cur_ino (the number of the inode currently being processed) and not sctx->cur_ino + 1. A test case for fstests follows soon. Signed-off-by: Robbie Ko Signed-off-by: Filipe Manana [Rewrote change log to be more detailed, clear and well formatted] Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 3d6a4dde8fe4..4115aba001ad 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3236,7 +3236,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) /* already deleted */ goto finish; } - ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); + ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino); if (ret < 0) goto out; if (!ret) -- cgit v1.2.3 From 443f9d266c4848a3a9ea386f66c4bed2f5997940 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Mon, 22 Jun 2015 17:08:45 +0800 Subject: Btrfs: send, fix warning due to late freeing of orphan_dir_info structures Under certain situations, when doing an incremental send, we can end up not freeing orphan_dir_info structures as soon as they are no longer needed. Instead we end up freeing them only after finishing the send stream, which causes a warning to be emitted: [282735.229200] ------------[ cut here ]------------ [282735.229968] WARNING: CPU: 9 PID: 10588 at fs/btrfs/send.c:6298 btrfs_ioctl_send+0xe2f/0xe51 [btrfs] [282735.231282] Modules linked in: btrfs crc32c_generic xor raid6_pq acpi_cpufreq tpm_tis ppdev tpm parport_pc psmouse parport sg pcspkr i2c_piix4 i2c_core evdev processor serio_raw button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio e1000 scsi_mod floppy [last unloaded: btrfs] [282735.237130] CPU: 9 PID: 10588 Comm: btrfs Tainted: G W 4.6.0-rc7-btrfs-next-31+ #1 [282735.239309] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [282735.240160] 0000000000000000 ffff880224273ca8 ffffffff8126b42c 0000000000000000 [282735.240160] 0000000000000000 ffff880224273ce8 ffffffff81052b14 0000189a24273ac8 [282735.240160] ffff8802210c9800 0000000000000000 0000000000000001 0000000000000000 [282735.240160] Call Trace: [282735.240160] [] dump_stack+0x67/0x90 [282735.240160] [] __warn+0xc2/0xdd [282735.240160] [] warn_slowpath_null+0x1d/0x1f [282735.240160] [] btrfs_ioctl_send+0xe2f/0xe51 [btrfs] [282735.240160] [] btrfs_ioctl+0x14f/0x1f81 [btrfs] [282735.240160] [] ? arch_local_irq_save+0x9/0xc [282735.240160] [] vfs_ioctl+0x18/0x34 [282735.240160] [] do_vfs_ioctl+0x550/0x5be [282735.240160] [] ? __fget+0x6b/0x77 [282735.240160] [] ? __fget_light+0x62/0x71 [282735.240160] [] SyS_ioctl+0x57/0x79 [282735.240160] [] entry_SYSCALL_64_fastpath+0x18/0xa8 [282735.240160] [] ? time_hardirqs_off+0x9/0x14 [282735.240160] [] ? trace_hardirqs_off_caller+0x1f/0xaa [282735.256343] ---[ end trace a4539270c8056f93 ]--- Consider the following example: Parent snapshot: . (ino 256) |--- a/ (ino 257) | |--- c/ (ino 260) | |--- del/ (ino 259) |--- tmp/ (ino 258) |--- x/ (ino 261) |--- y/ (ino 262) Send snapshot: . (ino 256) |--- a/ (ino 257) | |--- x/ (ino 261) | |--- y/ (ino 262) | |--- c/ (ino 260) |--- tmp/ (ino 258) 1) When processing inode 258, we end up delaying its rename operation because it has an ancestor (in the send snapshot) that has a higher inode number (inode 260) which was also renamed in the send snapshot, therefore we delay the rename of inode 258 so that it happens after inode 260 is renamed; 2) When processing inode 259, we end up delaying its deletion (rmdir operation) because it has a child inode (258) that has its rename operation delayed. At this point we allocate an orphan_dir_info structure and tag inode 258 so that we later attempt to see if we can delete (rmdir) inode 259 once inode 258 is renamed; 3) When we process inode 260, after renaming it we finally do the rename operation for inode 258. Once we issue the rename operation for inode 258 we notice that this inode was tagged so that we attempt to see if at this point we can delete (rmdir) inode 259. But at this point we can not still delete inode 259 because it has 2 children, inodes 261 and 262, that were not yet processed and therefore not yet moved (renamed) away from inode 259. We end up not freeing the orphan_dir_info structure allocated in step 2; 4) We process inodes 261 and 262, and once we move/rename inode 262 we issue the rmdir operation for inode 260; 5) We finish the send stream and notice that red black tree that contains orphan_dir_info structures is not empty, so we emit a warning and then free any orphan_dir_structures left. So fix this by freeing an orphan_dir_info structure once we try to apply a pending rename operation if we can not delete yet the tagged directory. A test case for fstests follows soon. Signed-off-by: Robbie Ko Signed-off-by: Filipe Manana [Modified changelog to be more detailed and easier to understand] --- fs/btrfs/send.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4115aba001ad..e552c33e72c0 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2947,6 +2947,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, } if (loc.objectid > send_progress) { + struct orphan_dir_info *odi; + + odi = get_orphan_dir_info(sctx, dir); + free_orphan_dir_info(sctx, odi); ret = 0; goto out; } -- cgit v1.2.3 From 764433a12e78e1a2161793c02eceafdfed2d652b Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Tue, 23 Jun 2015 18:39:50 +0800 Subject: Btrfs: send, fix invalid leaf accesses due to incorrect utimes operations During an incremental send, if we have delayed rename operations for inodes that were children of directories which were removed in the send snapshot, we can end up accessing incorrect items in a leaf or accessing beyond the last item of the leaf due to issuing utimes operations for the removed inodes. Consider the following example: Parent snapshot: . (ino 256) |--- a/ (ino 257) | |--- c/ (ino 262) | |--- b/ (ino 258) | |--- d/ (ino 263) | |--- del/ (ino 261) |--- x/ (ino 259) |--- y/ (ino 260) Send snapshot: . (ino 256) |--- a/ (ino 257) | |--- b/ (ino 258) | |--- c/ (ino 262) | |--- y/ (ino 260) | |--- d/ (ino 263) |--- x/ (ino 259) 1) When processing inodes 259 and 260, we end up delaying their rename operations because their parents, inodes 263 and 262 respectively, were not yet processed and therefore not yet renamed; 2) When processing inode 262, its rename operation is issued and right after the rename operation for inode 260 is issued. However right after issuing the rename operation for inode 260, at send.c:apply_dir_move(), we issue utimes operations for all current and past parents of inode 260. This means we try to send a utimes operation for its old parent, inode 261 (deleted in the send snapshot), which does not cause any immediate and deterministic failure, because when the target inode is not found in the send snapshot, the send.c:send_utimes() function ignores it and uses the leaf region pointed to by path->slots[0], which can be any unrelated item (belonging to other inode) or it can be a region outside the leaf boundaries, if the leaf is full and path->slots[0] matches the number of items in the leaf. So we end up either successfully sending a utimes operation, which is fine and irrelevant because the old parent (inode 261) will end up being deleted later, or we end up doing an invalid memory access tha crashes the kernel. So fix this by making apply_dir_move() issue utimes operations only for parents that still exist in the send snapshot. In a separate patch we will make send_utimes() return an error (-ENOENT) if the given inode does not exists in the send snapshot. Signed-off-by: Robbie Ko Signed-off-by: Filipe Manana [Rewrote change log to be more detailed and better organized] Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index e552c33e72c0..8b653967b163 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3270,8 +3270,18 @@ finish: * and old parent(s). */ list_for_each_entry(cur, &pm->update_refs, list) { - if (cur->dir == rmdir_ino) + /* + * The parent inode might have been deleted in the send snapshot + */ + ret = get_inode_info(sctx->send_root, cur->dir, NULL, + NULL, NULL, NULL, NULL, NULL); + if (ret == -ENOENT) { + ret = 0; continue; + } + if (ret < 0) + goto out; + ret = send_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; -- cgit v1.2.3 From 15b253eace1f98dabbc6e03f6793fcf8603b1655 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 2 Jul 2016 05:43:46 +0100 Subject: Btrfs: send, avoid incorrect leaf accesses when sending utimes operations The caller of send_utimes() is supposed to be sure that the inode number it passes to this function does actually exists in the send snapshot. However due to logic/algorithm bugs (such as the one fixed by the patch titled "Btrfs: send, fix invalid leaf accesses due to incorrect utimes operations"), this might not be the case and when that happens it makes send_utimes() access use an unrelated leaf item as the target inode item or access beyond a leaf's boundaries (when the leaf is full and path->slots[0] matches the number of items in the leaf). So if the call to btrfs_search_slot() done by send_utimes() does not find the inode item, just make sure send_utimes() returns -ENOENT and does not silently accesses unrelated leaf items or does invalid leaf accesses, also allowing us to easialy and deterministically catch such algorithmic/logic bugs. Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8b653967b163..2db8dc89ca41 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2502,6 +2502,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); + if (ret > 0) + ret = -ENOENT; if (ret < 0) goto out; -- cgit v1.2.3 From 951555856b88aa47bc238de6b4c6e97bfd9d36df Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 1 Aug 2016 01:50:37 +0100 Subject: Btrfs: send, don't bug on inconsistent snapshots When doing an incremental send, if we find a new/modified/deleted extent, reference or xattr without having previously processed the corresponding inode item we end up exexuting a BUG_ON(). This is because whenever an extent, xattr or reference is added, modified or deleted, we always expect to have the corresponding inode item updated. However there are situations where this will not happen due to transient -ENOMEM or -ENOSPC errors when doing delayed inode updates. For example, when punching holes we can succeed in deleting and modifying (shrinking) extents but later fail to do the delayed inode update. So after such failure we close our transaction handle and right after a snapshot of the fs/subvol tree can be made and used later for a send operation. The same thing can happen during truncate, link, unlink, and xattr related operations. So instead of executing a BUG_ON, make send return an -EIO error and print an informative error message do dmesg/syslog. Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2db8dc89ca41..efe129fe2678 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -273,6 +273,39 @@ struct name_cache_entry { char name[]; }; +static void inconsistent_snapshot_error(struct send_ctx *sctx, + enum btrfs_compare_tree_result result, + const char *what) +{ + const char *result_string; + + switch (result) { + case BTRFS_COMPARE_TREE_NEW: + result_string = "new"; + break; + case BTRFS_COMPARE_TREE_DELETED: + result_string = "deleted"; + break; + case BTRFS_COMPARE_TREE_CHANGED: + result_string = "updated"; + break; + case BTRFS_COMPARE_TREE_SAME: + ASSERT(0); + result_string = "unchanged"; + break; + default: + ASSERT(0); + result_string = "unexpected"; + } + + btrfs_err(sctx->send_root->fs_info, + "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu", + result_string, what, sctx->cmp_key->objectid, + sctx->send_root->root_key.objectid, + (sctx->parent_root ? + sctx->parent_root->root_key.objectid : 0)); +} + static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); static struct waiting_dir_move * @@ -5711,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx, { int ret = 0; - BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + if (sctx->cur_ino != sctx->cmp_key->objectid) { + inconsistent_snapshot_error(sctx, result, "reference"); + return -EIO; + } if (!sctx->cur_inode_new_gen && sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { @@ -5736,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx, { int ret = 0; - BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + if (sctx->cur_ino != sctx->cmp_key->objectid) { + inconsistent_snapshot_error(sctx, result, "xattr"); + return -EIO; + } if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { if (result == BTRFS_COMPARE_TREE_NEW) @@ -5760,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx, { int ret = 0; - BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); + if (sctx->cur_ino != sctx->cmp_key->objectid) { + inconsistent_snapshot_error(sctx, result, "extent"); + return -EIO; + } if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { if (result != BTRFS_COMPARE_TREE_DELETED) -- cgit v1.2.3 From 67710892ec983aa79ad1e2a2642fe8e3a4a194ea Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 6 Jun 2016 11:51:25 +0100 Subject: Btrfs: be more precise on errors when getting an inode from disk When we attempt to read an inode from disk, we end up always returning an -ESTALE error to the caller regardless of the actual failure reason, which can be an out of memory problem (when allocating a path), some error found when reading from the fs/subvolume btree (like a genuine IO error) or the inode does not exists. So lets start returning the real error code to the callers so that they don't treat all -ESTALE errors as meaning that the inode does not exists (such as during orphan cleanup). This will also be needed for a subsequent patch in the same series dealing with a special fsync case. Signed-off-by: Filipe Manana --- fs/btrfs/inode.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e61bd1f3f65..f9686541997b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3428,10 +3428,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); ret = PTR_ERR_OR_ZERO(inode); - if (ret && ret != -ESTALE) + if (ret && ret != -ENOENT) goto out; - if (ret == -ESTALE && root == root->fs_info->tree_root) { + if (ret == -ENOENT && root == root->fs_info->tree_root) { struct btrfs_root *dead_root; struct btrfs_fs_info *fs_info = root->fs_info; int is_dead_root = 0; @@ -3467,7 +3467,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * Inode is already gone but the orphan item is still there, * kill the orphan item. */ - if (ret == -ESTALE) { + if (ret == -ENOENT) { trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -3626,7 +3626,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, /* * read an inode from the btree into the in-memory inode */ -static void btrfs_read_locked_inode(struct inode *inode) +static int btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; struct extent_buffer *leaf; @@ -3645,14 +3645,19 @@ static void btrfs_read_locked_inode(struct inode *inode) filled = true; path = btrfs_alloc_path(); - if (!path) + if (!path) { + ret = -ENOMEM; goto make_bad; + } memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) + if (ret) { + if (ret > 0) + ret = -ENOENT; goto make_bad; + } leaf = path->nodes[0]; @@ -3805,11 +3810,12 @@ cache_acl: } btrfs_update_iflags(inode); - return; + return 0; make_bad: btrfs_free_path(path); make_bad_inode(inode); + return ret; } /* @@ -5595,7 +5601,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - btrfs_read_locked_inode(inode); + int ret; + + ret = btrfs_read_locked_inode(inode); if (!is_bad_inode(inode)) { inode_tree_add(inode); unlock_new_inode(inode); @@ -5604,7 +5612,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, } else { unlock_new_inode(inode); iput(inode); - inode = ERR_PTR(-ESTALE); + ASSERT(ret < 0); + inode = ERR_PTR(ret < 0 ? ret : -ESTALE); } } -- cgit v1.2.3 From 44f714dae50a2e795d3268a6831762aa6fa54f55 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 6 Jun 2016 16:11:13 +0100 Subject: Btrfs: improve performance on fsync against new inode after rename/unlink With commit 56f23fdbb600 ("Btrfs: fix file/data loss caused by fsync after rename and new inode") we got simple fix for a functional issue when the following sequence of actions is done: at transaction N create file A at directory D at transaction N + M (where M >= 1) move/rename existing file A from directory D to directory E create a new file named A at directory D fsync the new file power fail The solution was to simply detect such scenario and fallback to a full transaction commit when we detect it. However this turned out to had a significant impact on throughput (and a bit on latency too) for benchmarks using the dbench tool, which simulates real workloads from smbd (Samba) servers. For example on a test vm (with a debug kernel): Unpatched: Throughput 19.1572 MB/sec 32 clients 32 procs max_latency=1005.229 ms Patched: Throughput 23.7015 MB/sec 32 clients 32 procs max_latency=809.206 ms The patched results (this patch is applied) are similar to the results of a kernel with the commit 56f23fdbb600 ("Btrfs: fix file/data loss caused by fsync after rename and new inode") reverted. This change avoids the fallback to a transaction commit and instead makes sure all the names of the conflicting inode (the one that had a name in a past transaction that matches the name of the new file in the same parent directory) are logged so that at log replay time we don't lose neither the new file nor the old file, and the old file gets the name it was renamed to. This also ends up avoiding a full transaction commit for a similar case that involves an unlink instead of a rename of the old file: at transaction N create file A at directory D at transaction N + M (where M >= 1) remove file A create a new file named A at directory D fsync the new file power fail Signed-off-by: Filipe Manana --- fs/btrfs/inode.c | 19 +++++++++++- fs/btrfs/tree-log.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 95 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9686541997b..453b9d0da5f1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4203,6 +4203,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err = 0; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_trans_handle *trans; + u64 last_unlink_trans; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; @@ -4225,11 +4226,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (err) goto out; + last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; + /* now the directory is empty */ err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), dentry->d_name.name, dentry->d_name.len); - if (!err) + if (!err) { btrfs_i_size_write(inode, 0); + /* + * Propagate the last_unlink_trans value of the deleted dir to + * its parent directory. This is to prevent an unrecoverable + * log tree in the case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + */ + if (last_unlink_trans >= trans->transid) + BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; + } out: btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c05f69a8ec42..5fa624cd815d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4469,7 +4469,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, static int btrfs_check_ref_name_override(struct extent_buffer *eb, const int slot, const struct btrfs_key *key, - struct inode *inode) + struct inode *inode, + u64 *other_ino) { int ret; struct btrfs_path *search_path; @@ -4528,7 +4529,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, search_path, parent, name, this_name_len, 0); if (di && !IS_ERR(di)) { - ret = 1; + struct btrfs_key di_key; + + btrfs_dir_item_key_to_cpu(search_path->nodes[0], + di, &di_key); + if (di_key.type == BTRFS_INODE_ITEM_KEY) { + ret = 1; + *other_ino = di_key.objectid; + } else { + ret = -EAGAIN; + } goto out; } else if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -4718,16 +4728,71 @@ again: if ((min_key.type == BTRFS_INODE_REF_KEY || min_key.type == BTRFS_INODE_EXTREF_KEY) && BTRFS_I(inode)->generation == trans->transid) { + u64 other_ino = 0; + ret = btrfs_check_ref_name_override(path->nodes[0], path->slots[0], - &min_key, inode); + &min_key, inode, + &other_ino); if (ret < 0) { err = ret; goto out_unlock; } else if (ret > 0) { - err = 1; - btrfs_set_log_full_commit(root->fs_info, trans); - goto out_unlock; + struct btrfs_key inode_key; + struct inode *other_inode; + + if (ins_nr > 0) { + ins_nr++; + } else { + ins_nr = 1; + ins_start_slot = path->slots[0]; + } + ret = copy_items(trans, inode, dst_path, path, + &last_extent, ins_start_slot, + ins_nr, inode_only, + logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; + btrfs_release_path(path); + inode_key.objectid = other_ino; + inode_key.type = BTRFS_INODE_ITEM_KEY; + inode_key.offset = 0; + other_inode = btrfs_iget(root->fs_info->sb, + &inode_key, root, + NULL); + /* + * If the other inode that had a conflicting dir + * entry was deleted in the current transaction, + * we don't need to do more work nor fallback to + * a transaction commit. + */ + if (IS_ERR(other_inode) && + PTR_ERR(other_inode) == -ENOENT) { + goto next_key; + } else if (IS_ERR(other_inode)) { + err = PTR_ERR(other_inode); + goto out_unlock; + } + /* + * We are safe logging the other inode without + * acquiring its i_mutex as long as we log with + * the LOG_INODE_EXISTS mode. We're safe against + * concurrent renames of the other inode as well + * because during a rename we pin the log and + * update the log with the new name before we + * unpin it. + */ + err = btrfs_log_inode(trans, root, other_inode, + LOG_INODE_EXISTS, + 0, LLONG_MAX, ctx); + iput(other_inode); + if (err) + goto out_unlock; + else + goto next_key; } } @@ -4795,7 +4860,7 @@ next_slot: ins_nr = 0; } btrfs_release_path(path); - +next_key: if (min_key.offset < (u64)-1) { min_key.offset++; } else if (min_key.type < max_key.type) { @@ -4989,8 +5054,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) break; - if (IS_ROOT(parent)) + if (IS_ROOT(parent)) { + inode = d_inode(parent); + if (btrfs_must_commit_transaction(trans, inode)) + ret = 1; break; + } parent = dget_parent(parent); dput(old_parent); -- cgit v1.2.3 From e6571499336e10f93a77c51a35fd1a96828eea71 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 3 Aug 2016 11:01:32 +0100 Subject: Btrfs: remove unused function btrfs_add_delayed_qgroup_reserve() No longer used as of commit 5846a3c26873 ("btrfs: qgroup: Fix a race in delayed_ref which leads to abort trans"). Signed-off-by: Filipe Manana --- fs/btrfs/delayed-ref.c | 27 --------------------------- fs/btrfs/delayed-ref.h | 3 --- 2 files changed, 30 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 430b3689b112..02dd2ec7d534 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -861,33 +861,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 ref_root, u64 bytenr, u64 num_bytes) -{ - struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_delayed_ref_head *ref_head; - int ret = 0; - - if (!fs_info->quota_enabled || !is_fstree(ref_root)) - return 0; - - delayed_refs = &trans->transaction->delayed_refs; - - spin_lock(&delayed_refs->lock); - ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0); - if (!ref_head) { - ret = -ENOENT; - goto out; - } - WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root); - ref_head->qgroup_ref_root = ref_root; - ref_head->qgroup_reserved = num_bytes; -out: - spin_unlock(&delayed_refs->lock); - return ret; -} - int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 5fca9534a271..43f3629760e9 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, u64 parent, u64 ref_root, u64 owner, u64 offset, u64 reserved, int action, struct btrfs_delayed_extent_op *extent_op); -int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 ref_root, u64 bytenr, u64 num_bytes); int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, -- cgit v1.2.3