From 01fc48e8929e45e67527200017cff4e74e4ba054 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Aug 2012 09:46:17 -0400 Subject: ext4: don't load the block bitmap for block groups which have no space Add a short circuit check to ext4_mb_group_group() so that we don't bother to load the block bitmap for a block group which does not have any space available. (Or which does not have enough space until we are in desperation mode, i.e., when cr == 3.) Resolves-bug: https://bugzilla.kernel.org/show_bug.cgi?id=45741 Reported-by: mirek@me.com Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8eae94771c45..3a57975b73cc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1862,6 +1862,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, BUG_ON(cr < 0 || cr >= 4); + free = grp->bb_free; + if (free == 0) + return 0; + if (cr <= 2 && free < ac->ac_g_ex.fe_len) + return 0; + /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { int ret = ext4_mb_init_group(ac->ac_sb, group); @@ -1869,10 +1875,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, return 0; } - free = grp->bb_free; fragments = grp->bb_fragments; - if (free == 0) - return 0; if (fragments == 0) return 0; -- cgit v1.2.3 From 15c006a22f8e004afbce42a54c878162355f1587 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Fri, 17 Aug 2012 10:02:17 -0400 Subject: ext4: remove unused function argument 'order' in mb_find_extent() All the routines call mb_find_extent are setting argument 'order' to 0 just like: mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); therefore the useless argument should be removed. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3a57975b73cc..6873571c9f44 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1338,17 +1338,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); } -static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, +static int mb_find_extent(struct ext4_buddy *e4b, int block, int needed, struct ext4_free_extent *ex) { int next = block; - int max; + int max, order; void *buddy; assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); BUG_ON(ex == NULL); - buddy = mb_find_buddy(e4b, order, &max); + buddy = mb_find_buddy(e4b, 0, &max); BUG_ON(buddy == NULL); BUG_ON(block >= max); if (mb_test_bit(block, buddy)) { @@ -1358,12 +1358,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, return 0; } - /* FIXME dorp order completely ? */ - if (likely(order == 0)) { - /* find actual order */ - order = mb_find_order_for_block(e4b, block); - block = block >> order; - } + /* find actual order */ + order = mb_find_order_for_block(e4b, block); + block = block >> order; ex->fe_len = 1 << order; ex->fe_start = block << order; @@ -1549,7 +1546,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, /* recheck chunk's availability - we don't know * when it was found (within this lock-unlock * period or not) */ - max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); + max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex); if (max >= gex->fe_len) { ext4_mb_use_best_found(ac, e4b); return; @@ -1641,7 +1638,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); - max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { ac->ac_b_ex = ex; @@ -1672,7 +1669,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); - max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { @@ -1788,7 +1785,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, break; } - mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); + mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex); BUG_ON(ex.fe_len <= 0); if (free < ex.fe_len) { ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, @@ -1840,7 +1837,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { if (!mb_test_bit(i, bitmap)) { - max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); + max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); if (max >= sbi->s_stripe) { ac->ac_found++; ac->ac_b_ex = ex; -- cgit v1.2.3 From 28623c2f5b0dca3c3ea34fd6108940661352e276 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Sep 2012 01:31:50 -0400 Subject: ext4: grow the s_group_info array as needed Previously we allocated the s_group_info array with enough space for any future possible growth of the file system via online resize. This is unfortunate because it wastes memory, and it doesn't work for the meta_bg scheme, since there is no limit based on the number of reserved gdt blocks. So add the code to grow the s_group_info array as needed. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/mballoc.c | 79 +++++++++++++++++++++++++++---------------------------- fs/ext4/resize.c | 8 ++++++ 3 files changed, 50 insertions(+), 40 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 464cff711ed6..8b6902c4d7be 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1233,6 +1233,7 @@ struct ext4_sb_info { spinlock_t s_md_lock; unsigned short *s_mb_offsets; unsigned int *s_mb_maxs; + unsigned int s_group_info_size; /* tunables */ unsigned long s_stripe; @@ -1971,6 +1972,8 @@ extern void ext4_exit_mballoc(void); extern void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t block, unsigned long count, int flags); +extern int ext4_mb_alloc_groupinfo(struct super_block *sb, + ext4_group_t ngroups); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6873571c9f44..2102c20f7e98 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -24,6 +24,7 @@ #include "ext4_jbd2.h" #include "mballoc.h" #include +#include #include #include @@ -2163,6 +2164,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) return cachep; } +/* + * Allocate the top-level s_group_info array for the specified number + * of groups + */ +int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned size; + struct ext4_group_info ***new_groupinfo; + + size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> + EXT4_DESC_PER_BLOCK_BITS(sb); + if (size <= sbi->s_group_info_size) + return 0; + + size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); + new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); + if (!new_groupinfo) { + ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); + return -ENOMEM; + } + if (sbi->s_group_info) { + memcpy(new_groupinfo, sbi->s_group_info, + sbi->s_group_info_size * sizeof(*sbi->s_group_info)); + ext4_kvfree(sbi->s_group_info); + } + sbi->s_group_info = new_groupinfo; + sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", + sbi->s_group_info_size); + return 0; +} + /* Create and initialize ext4_group_info data for the given group. */ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, struct ext4_group_desc *desc) @@ -2252,49 +2286,14 @@ static int ext4_mb_init_backend(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - int num_meta_group_infos; - int num_meta_group_infos_max; - int array_size; + int err; struct ext4_group_desc *desc; struct kmem_cache *cachep; - /* This is the number of blocks used by GDT */ - num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); - - /* - * This is the total number of blocks used by GDT including - * the number of reserved blocks for GDT. - * The s_group_info array is allocated with this value - * to allow a clean online resize without a complex - * manipulation of pointer. - * The drawback is the unused memory when no resize - * occurs but it's very low in terms of pages - * (see comments below) - * Need to handle this properly when META_BG resizing is allowed - */ - num_meta_group_infos_max = num_meta_group_infos + - le16_to_cpu(es->s_reserved_gdt_blocks); + err = ext4_mb_alloc_groupinfo(sb, ngroups); + if (err) + return err; - /* - * array_size is the size of s_group_info array. We round it - * to the next power of two because this approximation is done - * internally by kmalloc so we can have some more memory - * for free here (e.g. may be used for META_BG resize). - */ - array_size = 1; - while (array_size < sizeof(*sbi->s_group_info) * - num_meta_group_infos_max) - array_size = array_size << 1; - /* An 8TB filesystem with 64-bit pointers requires a 4096 byte - * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. - * So a two level scheme suffices for now. */ - sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); - if (sbi->s_group_info == NULL) { - ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); - return -ENOMEM; - } sbi->s_buddy_cache = new_inode(sb); if (sbi->s_buddy_cache == NULL) { ext4_msg(sb, KERN_ERR, "can't get new inode"); @@ -2322,7 +2321,7 @@ err_freebuddy: cachep = get_groupinfo_cache(sb->s_blocksize_bits); while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); - i = num_meta_group_infos; + i = sbi->s_group_info_size; while (i-- > 0) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3f5c67bf13a2..f288933bf4c0 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1507,6 +1507,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (err) return err; + err = ext4_mb_alloc_groupinfo(sb, input->group + 1); + if (err) + goto out; + flex_gd.count = 1; flex_gd.groups = input; flex_gd.bg_flags = &bg_flags; @@ -1732,6 +1736,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) if (err) return err; + err = ext4_mb_alloc_groupinfo(sb, n_group + 1); + if (err) + goto out; + flex_gd = alloc_flex_gd(flexbg_size); if (flex_gd == NULL) { err = -ENOMEM; -- cgit v1.2.3 From b5e2368baeddf401bf3da9e364fc1c96676279cd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 18 Sep 2012 13:33:44 -0400 Subject: ext4: re-enable -o discard functionality in no-journal mode This is a revert of commit b56ff9d397ce, which removed the call to ext4_issue_discard() to fix a BUG reported because ext4_issue_discard() was being called from inside a block group spinlock. As it turns out this bug had already been fixed by Lukas Czerner in commit 53fdcf992d61 by the simple expedient of moving when we call ext4_issue_discard() outside the spinlock. So it should be safe to re-enable this functionality, which I tested by putting an BUG_ON(in_atomic) just after the restored callsite to ext4_issue_discard(). Addresses-Google-Bug: #6750518 Signed-off-by: "Theodore Ts'o" Cc: Anatol Pomozov --- fs/ext4/mballoc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2102c20f7e98..2c7c082b8169 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4656,6 +4656,8 @@ do_more: * with group lock held. generate_buddy look at * them with group lock_held */ + if (test_opt(sb, DISCARD)) + ext4_issue_discard(sb, block_group, bit, count); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); mb_free_blocks(inode, &e4b, bit, count_clusters); -- cgit v1.2.3 From 838cd0cf9af52e034ee81513642083bbe8e4ddb1 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sun, 23 Sep 2012 23:10:51 -0400 Subject: ext4: check free block counters in ext4_mb_find_by_goal Free block counters should be checked before doing allocation. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2c7c082b8169..bb821a924049 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1660,10 +1660,13 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, int max; int err; struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); + struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); struct ext4_free_extent ex; if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) return 0; + if (grp->bb_free == 0) + return 0; err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); if (err) -- cgit v1.2.3 From 85556c9a503ababa889bd7bb7a9b3effba795d00 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 26 Sep 2012 20:43:37 -0400 Subject: ext4: use kmem_cache_zalloc instead of kmem_cache_alloc/memset Using kmem_cache_zalloc() instead of kmem_cache_alloc() and memset(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bb821a924049..8ec6f88b7a37 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2232,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); - meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); + meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL); if (meta_group_info[i] == NULL) { ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); goto exit_group_info; } - memset(meta_group_info[i], 0, kmem_cache_size(cachep)); set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(meta_group_info[i]->bb_state)); @@ -4010,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ - memset(ac, 0, sizeof(struct ext4_allocation_context)); ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; @@ -4293,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } } - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS); if (!ac) { ar->len = 0; *errp = -ENOMEM; -- cgit v1.2.3 From aaf7d73e54b6915310ece11aedb19ec06a833642 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 26 Sep 2012 22:21:21 -0400 Subject: ext4: enable FITRIM ioctl on bigalloc file system With a minor tweaks regarding minimum extent size to discard and discarded bytes reporting the FITRIM can be enabled on bigalloc file system and it works without any problem. This patch fixes minlen handling and discarded bytes reporting to take into consideration bigalloc enabled file systems and finally removes the restriction and allow FITRIM to be used on file system with bigalloc feature enabled. Reviewed-by: Carlos Maiolino Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 7 ------- fs/ext4/mballoc.c | 5 +++-- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 8b84fe28ccaf..4c8174aa685c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -404,13 +404,6 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { - ext4_msg(sb, KERN_ERR, - "FITRIM not supported with bigalloc"); - return -EOPNOTSUPP; - } - if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8ec6f88b7a37..a415465f97a0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4990,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start = range->start >> sb->s_blocksize_bits; end = start + (range->len >> sb->s_blocksize_bits) - 1; - minlen = range->minlen >> sb->s_blocksize_bits; + minlen = EXT4_NUM_B2C(EXT4_SB(sb), + range->minlen >> sb->s_blocksize_bits); if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || unlikely(start >= max_blks)) @@ -5050,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); out: - range->len = trimmed * sb->s_blocksize; + range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; return ret; } -- cgit v1.2.3