diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-09 09:31:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-09 09:31:22 -0700 |
commit | bc2c6421cbb420677c4bb56adaf434414770ce8a (patch) | |
tree | dc488ded5d21f28c82ca62acd23dbea299aaa5e1 /fs/ext4/mballoc.c | |
parent | 58f587cb0b603de3d8869e021d4fa704e065afa8 (diff) | |
parent | ff95015648df445999c8483270905f7d3dec51e1 (diff) | |
download | linux-bc2c6421cbb420677c4bb56adaf434414770ce8a.tar.bz2 |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"The first major feature for ext4 this merge window is the largedir
feature, which allows ext4 directories to support over 2 billion
directory entries (assuming ~64 byte file names; in practice, users
will run into practical performance limits first.) This feature was
originally written by the Lustre team, and credit goes to Artem
Blagodarenko from Seagate for getting this feature upstream.
The second major major feature allows ext4 to support extended
attribute values up to 64k. This feature was also originally from
Lustre, and has been enhanced by Tahsin Erdogan from Google with a
deduplication feature so that if multiple files have the same xattr
value (for example, Windows ACL's stored by Samba), only one copy will
be stored on disk for encoding and caching efficiency.
We also have the usual set of bug fixes, cleanups, and optimizations"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (47 commits)
ext4: fix spelling mistake: "prellocated" -> "preallocated"
ext4: fix __ext4_new_inode() journal credits calculation
ext4: skip ext4_init_security() and encryption on ea_inodes
fs: generic_block_bmap(): initialize all of the fields in the temp bh
ext4: change fast symlink test to not rely on i_blocks
ext4: require key for truncate(2) of encrypted file
ext4: don't bother checking for encryption key in ->mmap()
ext4: check return value of kstrtoull correctly in reserved_clusters_store
ext4: fix off-by-one fsmap error on 1k block filesystems
ext4: return EFSBADCRC if a bad checksum error is found in ext4_find_entry()
ext4: return EIO on read error in ext4_find_entry
ext4: forbid encrypting root directory
ext4: send parallel discards on commit completions
ext4: avoid unnecessary stalls in ext4_evict_inode()
ext4: add nombcache mount option
ext4: strong binding of xattr inode references
ext4: eliminate xattr entry e_hash recalculation for removes
ext4: reserve space for xattr entries/names
quota: add get_inode_usage callback to transfer multi-inode charges
ext4: xattr inode deduplication
...
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 145 |
1 files changed, 94 insertions, 51 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b7928cddd539..581e357e8406 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -367,8 +367,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group); -static void ext4_free_data_callback(struct super_block *sb, - struct ext4_journal_cb_entry *jce, int rc); static inline void *mb_correct_addr_and_bit(int *bit, void *addr) { @@ -2639,6 +2637,7 @@ int ext4_mb_init(struct super_block *sb) spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_bal_lock); sbi->s_mb_free_pending = 0; + INIT_LIST_HEAD(&sbi->s_freed_data_list); sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; @@ -2782,7 +2781,8 @@ int ext4_mb_release(struct super_block *sb) } static inline int ext4_issue_discard(struct super_block *sb, - ext4_group_t block_group, ext4_grpblk_t cluster, int count) + ext4_group_t block_group, ext4_grpblk_t cluster, int count, + struct bio **biop) { ext4_fsblk_t discard_block; @@ -2791,18 +2791,18 @@ static inline int ext4_issue_discard(struct super_block *sb, count = EXT4_C2B(EXT4_SB(sb), count); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); + if (biop) { + return __blkdev_issue_discard(sb->s_bdev, + (sector_t)discard_block << (sb->s_blocksize_bits - 9), + (sector_t)count << (sb->s_blocksize_bits - 9), + GFP_NOFS, 0, biop); + } else + return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); } -/* - * This function is called by the jbd2 layer once the commit has finished, - * so we know we can free the blocks that were released with that commit. - */ -static void ext4_free_data_callback(struct super_block *sb, - struct ext4_journal_cb_entry *jce, - int rc) +static void ext4_free_data_in_buddy(struct super_block *sb, + struct ext4_free_data *entry) { - struct ext4_free_data *entry = (struct ext4_free_data *)jce; struct ext4_buddy e4b; struct ext4_group_info *db; int err, count = 0, count2 = 0; @@ -2810,18 +2810,6 @@ static void ext4_free_data_callback(struct super_block *sb, mb_debug(1, "gonna free %u blocks in group %u (0x%p):", entry->efd_count, entry->efd_group, entry); - if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, entry->efd_group, - entry->efd_start_cluster, - entry->efd_count); - if (err && err != -EOPNOTSUPP) - ext4_msg(sb, KERN_WARNING, "discard request in" - " group:%d block:%d count:%d failed" - " with %d", entry->efd_group, - entry->efd_start_cluster, - entry->efd_count, err); - } - err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); @@ -2862,6 +2850,56 @@ static void ext4_free_data_callback(struct super_block *sb, mb_debug(1, "freed %u blocks in %u structures\n", count, count2); } +/* + * This function is called by the jbd2 layer once the commit has finished, + * so we know we can free the blocks that were released with that commit. + */ +void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_free_data *entry, *tmp; + struct bio *discard_bio = NULL; + struct list_head freed_data_list; + struct list_head *cut_pos = NULL; + int err; + + INIT_LIST_HEAD(&freed_data_list); + + spin_lock(&sbi->s_md_lock); + list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) { + if (entry->efd_tid != commit_tid) + break; + cut_pos = &entry->efd_list; + } + if (cut_pos) + list_cut_position(&freed_data_list, &sbi->s_freed_data_list, + cut_pos); + spin_unlock(&sbi->s_md_lock); + + if (test_opt(sb, DISCARD)) { + list_for_each_entry(entry, &freed_data_list, efd_list) { + err = ext4_issue_discard(sb, entry->efd_group, + entry->efd_start_cluster, + entry->efd_count, + &discard_bio); + if (err && err != -EOPNOTSUPP) { + ext4_msg(sb, KERN_WARNING, "discard request in" + " group:%d block:%d count:%d failed" + " with %d", entry->efd_group, + entry->efd_start_cluster, + entry->efd_count, err); + } else if (err == -EOPNOTSUPP) + break; + } + + if (discard_bio) + submit_bio_wait(discard_bio); + } + + list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) + ext4_free_data_in_buddy(sb, entry); +} + int __init ext4_init_mballoc(void) { ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, @@ -3529,7 +3567,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_set_bits(bitmap, start, len); preallocated += len; } - mb_debug(1, "prellocated %u for group %u\n", preallocated, group); + mb_debug(1, "preallocated %u for group %u\n", preallocated, group); } static void ext4_mb_pa_callback(struct rcu_head *head) @@ -4464,7 +4502,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, trace_ext4_request_blocks(ar); /* Allow to use superuser reservation for quota file */ - if (IS_NOQUOTA(ar->inode)) + if (ext4_is_quota_file(ar->inode)) ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { @@ -4583,14 +4621,28 @@ out: * are contiguous, AND the extents were freed by the same transaction, * AND the blocks are associated with the same group. */ -static int can_merge(struct ext4_free_data *entry1, - struct ext4_free_data *entry2) +static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi, + struct ext4_free_data *entry, + struct ext4_free_data *new_entry, + struct rb_root *entry_rb_root) { - if ((entry1->efd_tid == entry2->efd_tid) && - (entry1->efd_group == entry2->efd_group) && - ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) - return 1; - return 0; + if ((entry->efd_tid != new_entry->efd_tid) || + (entry->efd_group != new_entry->efd_group)) + return; + if (entry->efd_start_cluster + entry->efd_count == + new_entry->efd_start_cluster) { + new_entry->efd_start_cluster = entry->efd_start_cluster; + new_entry->efd_count += entry->efd_count; + } else if (new_entry->efd_start_cluster + new_entry->efd_count == + entry->efd_start_cluster) { + new_entry->efd_count += entry->efd_count; + } else + return; + spin_lock(&sbi->s_md_lock); + list_del(&entry->efd_list); + spin_unlock(&sbi->s_md_lock); + rb_erase(&entry->efd_node, entry_rb_root); + kmem_cache_free(ext4_free_data_cachep, entry); } static noinline_for_stack int @@ -4646,29 +4698,19 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, node = rb_prev(new_node); if (node) { entry = rb_entry(node, struct ext4_free_data, efd_node); - if (can_merge(entry, new_entry) && - ext4_journal_callback_try_del(handle, &entry->efd_jce)) { - new_entry->efd_start_cluster = entry->efd_start_cluster; - new_entry->efd_count += entry->efd_count; - rb_erase(node, &(db->bb_free_root)); - kmem_cache_free(ext4_free_data_cachep, entry); - } + ext4_try_merge_freed_extent(sbi, entry, new_entry, + &(db->bb_free_root)); } node = rb_next(new_node); if (node) { entry = rb_entry(node, struct ext4_free_data, efd_node); - if (can_merge(new_entry, entry) && - ext4_journal_callback_try_del(handle, &entry->efd_jce)) { - new_entry->efd_count += entry->efd_count; - rb_erase(node, &(db->bb_free_root)); - kmem_cache_free(ext4_free_data_cachep, entry); - } + ext4_try_merge_freed_extent(sbi, entry, new_entry, + &(db->bb_free_root)); } - /* Add the extent to transaction's private list */ - new_entry->efd_jce.jce_func = ext4_free_data_callback; + spin_lock(&sbi->s_md_lock); - _ext4_journal_callback_add(handle, &new_entry->efd_jce); + list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list); sbi->s_mb_free_pending += clusters; spin_unlock(&sbi->s_md_lock); return 0; @@ -4871,7 +4913,8 @@ do_more: * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count); + err = ext4_issue_discard(sb, block_group, bit, count, + NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%lu failed" @@ -5094,7 +5137,7 @@ __acquires(bitlock) */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); - ret = ext4_issue_discard(sb, group, start, count); + ret = ext4_issue_discard(sb, group, start, count, NULL); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); return ret; |