summaryrefslogtreecommitdiffstats
path: root/include/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 10:00:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 10:00:39 -0800
commit6f3952cbe00b74739f540981d1afe84cd4dac879 (patch)
tree6dacecd7ea8dc06d985c7ea417391c1ae2aec574 /include/trace
parentf9d58de23152f2c16f326d7e014cfa2933b00304 (diff)
parent9d294a685fbcb256ce8c5f7fd88a7596d0f52a8a (diff)
downloadlinux-6f3952cbe00b74739f540981d1afe84cd4dac879.tar.bz2
Merge tag 'for-5.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This brings updates of space handling, performance improvements or bug fixes. The subpage block size and zoned mode features have reached state where they're usable but with limitations. Performance or related: - do not block on deleted block group mutex in the cleaner, avoids some long stalls - improved flushing: make it work better with ticket space reservations and avoid excessive transaction commits in some scenarios, slightly improves throughput for random write load - preemptive background flushing: separate the logic from ticket reservations, improve the accounting and decisions when to flush in low space conditions - less lock contention related to running delayed refs, let just one thread do the flushing when there are many inside transaction commit - dbench workload improvements: avoid unnecessary work when logging inodes, fewer fallbacks to transaction commit and thus less waiting for it (+7% throughput, -20% latency) Core: - subpage block size - currently read-only support - refactor and generalize code where sectorsize is assumed to be page size, add the subpage handling everywhere - the read-write support is on the way, page sizes are still limited to 4K or 64K - zoned mode, first working version but with limitations - SMR/ZBC/ZNS friendly allocation mode, utilizing the "no fixed location for structures" and chunked allocation - superblock as the only fixed data structure needs special handling, uses 2 consecutive zones as a ring buffer - tree-log support with a dedicated block group to avoid unordered writes - emulated zones on non-zoned devices - not yet working - all non-single block group profiles, requires more zone write pointer synchronization between the multiple block groups - fitrim due to dependency on space cache, can be implemented Fixes: - ref-verify: proper tree owner and node level tracking - fix pinned byte accounting, causing some early ENOSPC now more likely due to other changes in delayed refs Other: - error handling fixes and improvements - more error injection points - more function documentation - more and updated tracepoints - subset of W=1 checked by default - update comments to allow more automatic kdoc parameter checks" * tag 'for-5.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (144 commits) btrfs: zoned: enable to mount ZONED incompat flag btrfs: zoned: deal with holes writing out tree-log pages btrfs: zoned: reorder log node allocation on zoned filesystem btrfs: zoned: serialize log transaction on zoned filesystems btrfs: zoned: extend zoned allocator to use dedicated tree-log block group btrfs: split alloc_log_tree() btrfs: zoned: relocate block group to repair IO failure in zoned filesystems btrfs: zoned: enable relocation on a zoned filesystem btrfs: zoned: support dev-replace in zoned filesystems btrfs: zoned: implement copying for zoned device-replace btrfs: zoned: implement cloning for zoned device-replace btrfs: zoned: mark block groups to copy for device-replace btrfs: zoned: do not use async metadata checksum on zoned filesystems btrfs: zoned: wait for existing extents before truncating btrfs: zoned: serialize metadata IO btrfs: zoned: introduce dedicated data write path for zoned filesystems btrfs: zoned: enable zone append writing for direct IO btrfs: zoned: use ZONE_APPEND write for zoned mode btrfs: save irq flags when looking up an ordered extent btrfs: zoned: cache if block group is on a sequential zone ...
Diffstat (limited to 'include/trace')
-rw-r--r--include/trace/events/btrfs.h111
1 files changed, 103 insertions, 8 deletions
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index ecd24c719de4..0551ea65374f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -99,7 +99,8 @@ struct btrfs_space_info;
EM( ALLOC_CHUNK, "ALLOC_CHUNK") \
EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \
EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \
- EMe(COMMIT_TRANS, "COMMIT_TRANS")
+ EM( COMMIT_TRANS, "COMMIT_TRANS") \
+ EMe(FORCE_COMMIT_TRANS, "FORCE_COMMIT_TRANS")
/*
* First define the enums in the above macros to be exported to userspace via
@@ -499,12 +500,13 @@ DEFINE_EVENT(
#define show_ordered_flags(flags) \
__print_flags(flags, "|", \
- { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \
- { (1 << BTRFS_ORDERED_COMPLETE), "COMPLETE" }, \
+ { (1 << BTRFS_ORDERED_REGULAR), "REGULAR" }, \
{ (1 << BTRFS_ORDERED_NOCOW), "NOCOW" }, \
- { (1 << BTRFS_ORDERED_COMPRESSED), "COMPRESSED" }, \
{ (1 << BTRFS_ORDERED_PREALLOC), "PREALLOC" }, \
+ { (1 << BTRFS_ORDERED_COMPRESSED), "COMPRESSED" }, \
{ (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \
+ { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \
+ { (1 << BTRFS_ORDERED_COMPLETE), "COMPLETE" }, \
{ (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \
{ (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" })
@@ -1111,15 +1113,16 @@ TRACE_EVENT(btrfs_trigger_flush,
TRACE_EVENT(btrfs_flush_space,
TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
- int state, int ret),
+ int state, int ret, bool for_preempt),
- TP_ARGS(fs_info, flags, num_bytes, state, ret),
+ TP_ARGS(fs_info, flags, num_bytes, state, ret, for_preempt),
TP_STRUCT__entry_btrfs(
__field( u64, flags )
__field( u64, num_bytes )
__field( int, state )
__field( int, ret )
+ __field( bool, for_preempt )
),
TP_fast_assign_btrfs(fs_info,
@@ -1127,15 +1130,16 @@ TRACE_EVENT(btrfs_flush_space,
__entry->num_bytes = num_bytes;
__entry->state = state;
__entry->ret = ret;
+ __entry->for_preempt = for_preempt;
),
- TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
+ TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d for_preempt=%d",
__entry->state,
__print_symbolic(__entry->state, FLUSH_STATES),
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS),
- __entry->num_bytes, __entry->ret)
+ __entry->num_bytes, __entry->ret, __entry->for_preempt)
);
DECLARE_EVENT_CLASS(btrfs__reserved_extent,
@@ -2025,6 +2029,97 @@ TRACE_EVENT(btrfs_convert_extent_bit,
__print_flags(__entry->clear_bits, "|", EXTENT_FLAGS))
);
+DECLARE_EVENT_CLASS(btrfs_dump_space_info,
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo),
+
+ TP_ARGS(fs_info, sinfo),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, flags )
+ __field( u64, total_bytes )
+ __field( u64, bytes_used )
+ __field( u64, bytes_pinned )
+ __field( u64, bytes_reserved )
+ __field( u64, bytes_may_use )
+ __field( u64, bytes_readonly )
+ __field( u64, reclaim_size )
+ __field( int, clamp )
+ __field( u64, global_reserved )
+ __field( u64, trans_reserved )
+ __field( u64, delayed_refs_reserved )
+ __field( u64, delayed_reserved )
+ __field( u64, free_chunk_space )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->flags = sinfo->flags;
+ __entry->total_bytes = sinfo->total_bytes;
+ __entry->bytes_used = sinfo->bytes_used;
+ __entry->bytes_pinned = sinfo->bytes_pinned;
+ __entry->bytes_reserved = sinfo->bytes_reserved;
+ __entry->bytes_may_use = sinfo->bytes_may_use;
+ __entry->bytes_readonly = sinfo->bytes_readonly;
+ __entry->reclaim_size = sinfo->reclaim_size;
+ __entry->clamp = sinfo->clamp;
+ __entry->global_reserved = fs_info->global_block_rsv.reserved;
+ __entry->trans_reserved = fs_info->trans_block_rsv.reserved;
+ __entry->delayed_refs_reserved = fs_info->delayed_refs_rsv.reserved;
+ __entry->delayed_reserved = fs_info->delayed_block_rsv.reserved;
+ __entry->free_chunk_space = atomic64_read(&fs_info->free_chunk_space);
+ ),
+
+ TP_printk_btrfs("flags=%s total_bytes=%llu bytes_used=%llu "
+ "bytes_pinned=%llu bytes_reserved=%llu "
+ "bytes_may_use=%llu bytes_readonly=%llu "
+ "reclaim_size=%llu clamp=%d global_reserved=%llu "
+ "trans_reserved=%llu delayed_refs_reserved=%llu "
+ "delayed_reserved=%llu chunk_free_space=%llu",
+ __print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
+ __entry->total_bytes, __entry->bytes_used,
+ __entry->bytes_pinned, __entry->bytes_reserved,
+ __entry->bytes_may_use, __entry->bytes_readonly,
+ __entry->reclaim_size, __entry->clamp,
+ __entry->global_reserved, __entry->trans_reserved,
+ __entry->delayed_refs_reserved,
+ __entry->delayed_reserved, __entry->free_chunk_space)
+);
+
+DEFINE_EVENT(btrfs_dump_space_info, btrfs_done_preemptive_reclaim,
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo),
+ TP_ARGS(fs_info, sinfo)
+);
+
+TRACE_EVENT(btrfs_reserve_ticket,
+ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
+ u64 start_ns, int flush, int error),
+
+ TP_ARGS(fs_info, flags, bytes, start_ns, flush, error),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, flags )
+ __field( u64, bytes )
+ __field( u64, start_ns )
+ __field( int, flush )
+ __field( int, error )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->flags = flags;
+ __entry->bytes = bytes;
+ __entry->start_ns = start_ns;
+ __entry->flush = flush;
+ __entry->error = error;
+ ),
+
+ TP_printk_btrfs("flags=%s bytes=%llu start_ns=%llu flush=%s error=%d",
+ __print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS),
+ __entry->bytes, __entry->start_ns,
+ __print_symbolic(__entry->flush, FLUSH_ACTIONS),
+ __entry->error)
+);
+
DECLARE_EVENT_CLASS(btrfs_sleep_tree_lock,
TP_PROTO(const struct extent_buffer *eb, u64 start_ns),