summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2021-07-20 16:03:03 +0100
committerDavid Sterba <dsterba@suse.com>2021-08-23 13:19:00 +0200
commit069a2e37789a9adb236d8f7a5f65a1390b51f184 (patch)
treee314eebe412160689073032653a5bae97a6f3115
parent5da384799278afe0d2557e4d4482240840c208b8 (diff)
downloadlinux-069a2e37789a9adb236d8f7a5f65a1390b51f184.tar.bz2
btrfs: continue readahead of siblings even if target node is in memory
At reada_for_search(), when attempting to readahead a node or leaf's siblings, we skip the readahead of the siblings if the node/leaf is already in memory. That is probably fine for the READA_FORWARD and READA_BACK readahead types, as they are used on contexts where we end up reading some consecutive leaves, but usually not the whole btree. However for a READA_FORWARD_ALWAYS mode, currently only used for full send operations, it does not make sense to skip the readahead if the target node or leaf is already loaded in memory, since we know the caller is visiting every node and leaf of the btree in ascending order. So change the behaviour to not skip the readahead when the target node is already in memory and the readahead mode is READA_FORWARD_ALWAYS. The following test script was used to measure the improvement on a box using an average, consumer grade, spinning disk, with 32GiB of RAM and using a non-debug kernel config (Debian's default config). $ cat test.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj MKFS_OPTIONS="--nodesize 16384" # default, just to be explicit MOUNT_OPTIONS="-o max_inline=2048" # default, just to be explicit mkfs.btrfs -f $MKFS_OPTIONS $DEV > /dev/null mount $MOUNT_OPTIONS $DEV $MNT # Create files with inline data to make it easier and faster to create # large btrees. add_files() { local total=$1 local start_offset=$2 local number_jobs=$3 local total_per_job=$(($total / $number_jobs)) echo "Creating $total new files using $number_jobs jobs" for ((n = 0; n < $number_jobs; n++)); do ( local start_num=$(($start_offset + $n * $total_per_job)) for ((i = 1; i <= $total_per_job; i++)); do local file_num=$((start_num + $i)) local file_path="$MNT/file_${file_num}" xfs_io -f -c "pwrite -S 0xab 0 2000" $file_path > /dev/null if [ $? -ne 0 ]; then echo "Failed creating file $file_path" break fi done ) & worker_pids[$n]=$! done wait ${worker_pids[@]} sync echo echo "btree node/leaf count: $(btrfs inspect-internal dump-tree -t 5 $DEV | egrep '^(node|leaf) ' | wc -l)" } file_count=2000000 add_files $file_count 0 4 echo echo "Creating snapshot..." btrfs subvolume snapshot -r $MNT $MNT/snap1 umount $MNT echo 3 > /proc/sys/vm/drop_caches blockdev --flushbufs $DEV &> /dev/null hdparm -F $DEV &> /dev/null mount $MOUNT_OPTIONS $DEV $MNT echo echo "Testing full send..." start=$(date +%s) btrfs send $MNT/snap1 > /dev/null end=$(date +%s) echo echo "Full send took $((end - start)) seconds" umount $MNT The duration of the full send operations, in seconds, were the following: Before this change: 85 seconds After this change: 76 seconds (-11.2%) Reviewed-by: Qu Wenruo <wqu@suse.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/ctree.c13
1 files changed, 8 insertions, 5 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c212f1218fdd..63c026495193 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1233,7 +1233,6 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
u64 target;
u64 nread = 0;
u64 nread_max;
- struct extent_buffer *eb;
u32 nr;
u32 blocksize;
u32 nscan = 0;
@@ -1262,10 +1261,14 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
search = btrfs_node_blockptr(node, slot);
blocksize = fs_info->nodesize;
- eb = find_extent_buffer(fs_info, search);
- if (eb) {
- free_extent_buffer(eb);
- return;
+ if (path->reada != READA_FORWARD_ALWAYS) {
+ struct extent_buffer *eb;
+
+ eb = find_extent_buffer(fs_info, search);
+ if (eb) {
+ free_extent_buffer(eb);
+ return;
+ }
}
target = search;