summaryrefslogtreecommitdiffstats
path: root/fs/f2fs/super.c
diff options
context:
space:
mode:
authorChao Yu <chao@kernel.org>2021-08-03 08:15:43 +0800
committerJaegeuk Kim <jaegeuk@kernel.org>2021-08-03 11:16:17 -0700
commit4f993264fe2965c344f223d854ccbb549b16ed71 (patch)
treee6ddde3c3c83f4551f75c2a0337e34cd3c95c91f /fs/f2fs/super.c
parentdc675a97129c4d9d5af55a3d7f23d7e092b8e032 (diff)
downloadlinux-4f993264fe2965c344f223d854ccbb549b16ed71.tar.bz2
f2fs: introduce discard_unit mount option
As James Z reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213877 [1.] One-line summary of the problem: Mount multiple SMR block devices exceed certain number cause system non-response [2.] Full description of the problem/report: Created some F2FS on SMR devices (mkfs.f2fs -m), then mounted in sequence. Each device is the same Model: HGST HSH721414AL (Size 14TB). Empirically, found that when the amount of SMR device * 1.5Gb > System RAM, the system ran out of memory and hung. No dmesg output. For example, 24 SMR Disk need 24*1.5GB = 36GB. A system with 32G RAM can only mount 21 devices, the 22nd device will be a reproducible cause of system hang. The number of SMR devices with other FS mounted on this system does not interfere with the result above. [3.] Keywords (i.e., modules, networking, kernel): F2FS, SMR, Memory [4.] Kernel information [4.1.] Kernel version (uname -a): Linux 5.13.4-200.fc34.x86_64 #1 SMP Tue Jul 20 20:27:29 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux [4.2.] Kernel .config file: Default Fedora 34 with f2fs-tools-1.14.0-2.fc34.x86_64 [5.] Most recent kernel version which did not have the bug: None [6.] Output of Oops.. message (if applicable) with symbolic information resolved (see Documentation/admin-guide/oops-tracing.rst) None [7.] A small shell script or example program which triggers the problem (if possible) mount /dev/sdX /mnt/0X [8.] Memory consumption With 24 * 14T SMR Block device with F2FS free -g total used free shared buff/cache available Mem: 46 36 0 0 10 10 Swap: 0 0 0 With 3 * 14T SMR Block device with F2FS free -g total used free shared buff/cache available Mem: 7 5 0 0 1 1 Swap: 7 0 7 The root cause is, there are three bitmaps: - cur_valid_map - ckpt_valid_map - discard_map and each of them will cost ~500MB memory, {cur, ckpt}_valid_map are necessary, but discard_map is optional, since this bitmap will only be useful in mountpoint that small discard is enabled. For a blkzoned device such as SMR or ZNS devices, f2fs will only issue discard for a section(zone) when all blocks of that section are invalid, so, for such device, we don't need small discard functionality at all. This patch introduces a new mountoption "discard_unit=block|segment| section" to support issuing discard with different basic unit which is aligned to block, segment or section, so that user can specify "discard_unit=segment" or "discard_unit=section" to disable small discard functionality. Note that this mount option can not be changed by remount() due to related metadata need to be initialized during mount(). In order to save memory, let's use "discard_unit=section" for blkzoned device by default. Signed-off-by: Chao Yu <chao@kernel.org> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs/f2fs/super.c')
-rw-r--r--fs/f2fs/super.c54
1 files changed, 51 insertions, 3 deletions
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3617aa5f0477..a4fed184b811 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -155,6 +155,7 @@ enum {
Opt_atgc,
Opt_gc_merge,
Opt_nogc_merge,
+ Opt_discard_unit,
Opt_err,
};
@@ -231,6 +232,7 @@ static match_table_t f2fs_tokens = {
{Opt_atgc, "atgc"},
{Opt_gc_merge, "gc_merge"},
{Opt_nogc_merge, "nogc_merge"},
+ {Opt_discard_unit, "discard_unit=%s"},
{Opt_err, NULL},
};
@@ -1173,6 +1175,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_nogc_merge:
clear_opt(sbi, GC_MERGE);
break;
+ case Opt_discard_unit:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "block")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_BLOCK;
+ } else if (!strcmp(name, "segment")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SEGMENT;
+ } else if (!strcmp(name, "section")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SECTION;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -1211,6 +1232,14 @@ default_check:
return -EINVAL;
}
#endif
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ if (F2FS_OPTION(sbi).discard_unit !=
+ DISCARD_UNIT_SECTION) {
+ f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SECTION;
+ }
+ }
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_test_compress_extension(sbi)) {
@@ -1925,6 +1954,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sbi, ATGC))
seq_puts(seq, ",atgc");
+
+ if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK)
+ seq_printf(seq, ",discard_unit=%s", "block");
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+ seq_printf(seq, ",discard_unit=%s", "segment");
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+ seq_printf(seq, ",discard_unit=%s", "section");
+
return 0;
}
@@ -1961,10 +1998,13 @@ static void default_options(struct f2fs_sb_info *sbi)
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi))
+ if (f2fs_sb_has_blkzoned(sbi)) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
- else
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
+ } else {
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
+ }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -2066,6 +2106,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool no_atgc = !test_opt(sbi, ATGC);
bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
+ bool block_unit_discard = f2fs_block_unit_discard(sbi);
#ifdef CONFIG_QUOTA
int i, j;
#endif
@@ -2166,6 +2207,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ if (block_unit_discard != f2fs_block_unit_discard(sbi)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch discard_unit option is not allowed");
+ goto restore_opts;
+ }
+
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
@@ -3778,7 +3825,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
/* adjust parameters according to the volume size */
if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
- sm_i->dcc_info->discard_granularity = 1;
+ if (f2fs_block_unit_discard(sbi))
+ sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}