From b720303df7352d4a7a1f61e467e0a124913c0d41 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Wed, 12 May 2010 00:00:00 -0400 Subject: ext4: fix memory leaks in error path handling of ext4_ext_zeroout() When EIO occurs after bio is submitted, there is no memory free operation for bio, which results in memory leakage. And there is also no check against bio_alloc() for bio. Acked-by: Dave Kleikamp Signed-off-by: Jing Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 236b834b4ca8..228eeaf2dccf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error) /* FIXME!! we need to try to merge to left or right after zero-out */ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { - int ret = -EIO; + int ret; struct bio *bio; int blkbits, blocksize; sector_t ee_pblock; @@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) len = ee_len; bio = bio_alloc(GFP_NOIO, len); + if (!bio) + return -ENOMEM; + bio->bi_sector = ee_pblock; bio->bi_bdev = inode->i_sb->s_bdev; @@ -2595,17 +2598,15 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) submit_bio(WRITE, bio); wait_for_completion(&event); - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - ret = 0; - else { - ret = -EIO; - break; + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + bio_put(bio); + return -EIO; } bio_put(bio); ee_len -= done; ee_pblock += done << (blkbits - 9); } - return ret; + return 0; } #define EXT4_EXT_ZERO_LEN 7 -- cgit v1.2.3 From 21ca087a3891efab4d45488db8febee474d26c68 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 16 May 2010 06:00:00 -0400 Subject: ext4: Do not zero out uninitialized extents beyond i_size The extents code will sometimes zero out blocks and mark them as initialized instead of splitting an extent into several smaller ones. This optimization however, causes problems if the extent is beyond i_size because fsck will complain if there are uninitialized blocks after i_size as this can not be distinguished from an inode that has an incorrect i_size field. https://bugzilla.kernel.org/show_bug.cgi?id=15742 Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 67 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 16 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 228eeaf2dccf..ee611daf0748 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2631,11 +2631,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_extent *ex2 = NULL; struct ext4_extent *ex3 = NULL; struct ext4_extent_header *eh; - ext4_lblk_t ee_block; + ext4_lblk_t ee_block, eof_block; unsigned int allocated, ee_len, depth; ext4_fsblk_t newblock; int err = 0; int ret = 0; + int may_zeroout; + + ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" + "block %llu, max_blocks %u\n", inode->i_ino, + (unsigned long long)iblock, max_blocks); + + eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits; + if (eof_block < iblock + max_blocks) + eof_block = iblock + max_blocks; depth = ext_depth(inode); eh = path[depth].p_hdr; @@ -2644,16 +2654,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (iblock - ee_block); newblock = iblock - ee_block + ext_pblock(ex); + ex2 = ex; orig_ex.ee_block = ex->ee_block; orig_ex.ee_len = cpu_to_le16(ee_len); ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); + /* + * It is safe to convert extent to initialized via explicit + * zeroout only if extent is fully insde i_size or new_size. + */ + may_zeroout = ee_block + ee_len <= eof_block; + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ - if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { + if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2684,7 +2701,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (allocated > max_blocks) { unsigned int newdepth; /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ - if (allocated <= EXT4_EXT_ZERO_LEN) { + if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { /* * iblock == ee_block is handled by the zerouout * at the beginning. @@ -2760,7 +2777,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex3->ee_len = cpu_to_le16(allocated - max_blocks); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2784,8 +2801,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * update the extent length after successful insert of the * split extent */ - orig_ex.ee_len = cpu_to_le16(ee_len - - ext4_ext_get_actual_len(ex3)); + ee_len -= ext4_ext_get_actual_len(ex3); + orig_ex.ee_len = cpu_to_le16(ee_len); + may_zeroout = ee_block + ee_len <= eof_block; + depth = newdepth; ext4_ext_drop_refs(path); path = ext4_ext_find_extent(inode, iblock, path); @@ -2809,7 +2828,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * otherwise give the extent a chance to merge to left */ if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && - iblock != ee_block) { + iblock != ee_block && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2878,7 +2897,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; insert: err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2938,14 +2957,21 @@ static int ext4_split_unwritten_extents(handle_t *handle, struct ext4_extent *ex2 = NULL; struct ext4_extent *ex3 = NULL; struct ext4_extent_header *eh; - ext4_lblk_t ee_block; + ext4_lblk_t ee_block, eof_block; unsigned int allocated, ee_len, depth; ext4_fsblk_t newblock; int err = 0; + int may_zeroout; + + ext_debug("ext4_split_unwritten_extents: inode %lu, logical" + "block %llu, max_blocks %u\n", inode->i_ino, + (unsigned long long)iblock, max_blocks); + + eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits; + if (eof_block < iblock + max_blocks) + eof_block = iblock + max_blocks; - ext_debug("ext4_split_unwritten_extents: inode %lu," - "iblock %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)iblock, max_blocks); depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; @@ -2953,11 +2979,18 @@ static int ext4_split_unwritten_extents(handle_t *handle, ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (iblock - ee_block); newblock = iblock - ee_block + ext_pblock(ex); + ex2 = ex; orig_ex.ee_block = ex->ee_block; orig_ex.ee_len = cpu_to_le16(ee_len); ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); + /* + * It is safe to convert extent to initialized via explicit + * zeroout only if extent is fully insde i_size or new_size. + */ + may_zeroout = ee_block + ee_len <= eof_block; + /* * If the uninitialized extent begins at the same logical * block where the write begins, and the write completely @@ -2992,7 +3025,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, ex3->ee_len = cpu_to_le16(allocated - max_blocks); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -3016,8 +3049,10 @@ static int ext4_split_unwritten_extents(handle_t *handle, * update the extent length after successful insert of the * split extent */ - orig_ex.ee_len = cpu_to_le16(ee_len - - ext4_ext_get_actual_len(ex3)); + ee_len -= ext4_ext_get_actual_len(ex3); + orig_ex.ee_len = cpu_to_le16(ee_len); + may_zeroout = ee_block + ee_len <= eof_block; + depth = newdepth; ext4_ext_drop_refs(path); path = ext4_ext_find_extent(inode, iblock, path); @@ -3063,7 +3098,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, goto out; insert: err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; -- cgit v1.2.3 From 6d19c42b7cf81c39632b6d4dbc514e8449bcd346 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Sun, 16 May 2010 14:00:00 -0400 Subject: ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate Currently using posix_fallocate one can bypass an RLIMIT_FSIZE limit and create a file larger than the limit. Add a check for that. Signed-off-by: Nikanth Karthikesan Signed-off-by: Amit Arora Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ee611daf0748..8a8f9f0be911 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3708,6 +3708,11 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) */ credits = ext4_chunk_trans_blocks(inode, max_blocks); mutex_lock(&inode->i_mutex); + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) { + mutex_unlock(&inode->i_mutex); + return ret; + } retry: while (ret >= 0 && ret < max_blocks) { block = block + ret; -- cgit v1.2.3 From e35fd6609b2fee54484d520deccb8f18bf7d38f3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 19:00:00 -0400 Subject: ext4: Add new abstraction ext4_map_blocks() underneath ext4_get_blocks() Jack up ext4_get_blocks() and add a new function, ext4_map_blocks() which uses a much smaller structure, struct ext4_map_blocks which is 20 bytes, as opposed to a struct buffer_head, which nearly 5 times bigger on an x86_64 machine. By switching things to use ext4_map_blocks(), we can save stack space by using ext4_map_blocks() since we can avoid allocating a struct buffer_head on the stack. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 30 ++++++- fs/ext4/extents.c | 237 ++++++++++++++++++++++++++---------------------------- fs/ext4/inode.c | 102 +++++++++++++---------- 3 files changed, 199 insertions(+), 170 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d266003cac3e..57fc0e5c0918 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -125,6 +125,29 @@ struct ext4_allocation_request { unsigned int flags; }; +/* + * Logical to physical block mapping, used by ext4_map_blocks() + * + * This structure is used to pass requests into ext4_map_blocks() as + * well as to store the information returned by ext4_map_blocks(). It + * takes less room on the stack than a struct buffer_head. + */ +#define EXT4_MAP_NEW (1 << BH_New) +#define EXT4_MAP_MAPPED (1 << BH_Mapped) +#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) +#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) +#define EXT4_MAP_UNINIT (1 << BH_Uninit) +#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ + EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ + EXT4_MAP_UNINIT) + +struct ext4_map_blocks { + ext4_fsblk_t m_pblk; + ext4_lblk_t m_lblk; + unsigned int m_len; + unsigned int m_flags; +}; + /* * For delayed allocation tracking */ @@ -1773,9 +1796,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk); -extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned int max_blocks, - struct buffer_head *bh_result, int flags); +extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags); extern void ext4_ext_truncate(struct inode *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); @@ -1783,6 +1805,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ssize_t len); +extern int ext4_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags); extern int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, unsigned int max_blocks, struct buffer_head *bh, int flags); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8a8f9f0be911..37f938789344 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2611,7 +2611,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) #define EXT4_EXT_ZERO_LEN 7 /* - * This function is called by ext4_ext_get_blocks() if someone tries to write + * This function is called by ext4_ext_map_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized * extent into multiple extents (upto three - one initialized and two * uninitialized). @@ -2621,10 +2621,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * c> Splits in three extents: Somone is writing in middle of the extent */ static int ext4_ext_convert_to_initialized(handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t iblock, - unsigned int max_blocks) + struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path) { struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex1 = NULL; @@ -2640,20 +2639,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)iblock, max_blocks); + (unsigned long long)map->m_lblk, map->m_len); eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; - if (eof_block < iblock + max_blocks) - eof_block = iblock + max_blocks; + if (eof_block < map->m_lblk + map->m_len) + eof_block = map->m_lblk + map->m_len; depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - allocated = ee_len - (iblock - ee_block); - newblock = iblock - ee_block + ext_pblock(ex); + allocated = ee_len - (map->m_lblk - ee_block); + newblock = map->m_lblk - ee_block + ext_pblock(ex); ex2 = ex; orig_ex.ee_block = ex->ee_block; @@ -2683,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, return allocated; } - /* ex1: ee_block to iblock - 1 : uninitialized */ - if (iblock > ee_block) { + /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ + if (map->m_lblk > ee_block) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } @@ -2695,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * we insert ex3, if ex1 is NULL. This is to avoid temporary * overlap of blocks. */ - if (!ex1 && allocated > max_blocks) - ex2->ee_len = cpu_to_le16(max_blocks); + if (!ex1 && allocated > map->m_len) + ex2->ee_len = cpu_to_le16(map->m_len); /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > max_blocks) { + if (allocated > map->m_len) { unsigned int newdepth; /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { /* - * iblock == ee_block is handled by the zerouout + * map->m_lblk == ee_block is handled by the zerouout * at the beginning. * Mark first half uninitialized. * Mark second half initialized and zero out the @@ -2716,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_dirty(handle, inode, path + depth); ex3 = &newex; - ex3->ee_block = cpu_to_le32(iblock); + ex3->ee_block = cpu_to_le32(map->m_lblk); ext4_ext_store_pblock(ex3, newblock); ex3->ee_len = cpu_to_le16(allocated); err = ext4_ext_insert_extent(handle, inode, path, @@ -2729,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } else if (err) @@ -2751,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ depth = ext_depth(inode); ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, - iblock, path); + path = ext4_ext_find_extent(inode, map->m_lblk, + path); if (IS_ERR(path)) { err = PTR_ERR(path); return err; @@ -2772,9 +2771,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, return allocated; } ex3 = &newex; - ex3->ee_block = cpu_to_le32(iblock + max_blocks); - ext4_ext_store_pblock(ex3, newblock + max_blocks); - ex3->ee_len = cpu_to_le16(allocated - max_blocks); + ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); + ext4_ext_store_pblock(ex3, newblock + map->m_len); + ex3->ee_len = cpu_to_le16(allocated - map->m_len); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); if (err == -ENOSPC && may_zeroout) { @@ -2787,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zeroed the full extent */ - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } else if (err) @@ -2807,7 +2806,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, depth = newdepth; ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, iblock, path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; @@ -2821,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (err) goto out; - allocated = max_blocks; + allocated = map->m_len; /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying * to insert a extent in the middle zerout directly * otherwise give the extent a chance to merge to left */ if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && - iblock != ee_block && may_zeroout) { + map->m_lblk != ee_block && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2838,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zero out the first half */ - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } } @@ -2849,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ if (ex1 && ex1 != ex) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } - /* ex2: iblock to iblock + maxblocks-1 : initialised */ - ex2->ee_block = cpu_to_le32(iblock); + /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ + ex2->ee_block = cpu_to_le32(map->m_lblk); ext4_ext_store_pblock(ex2, newblock); ex2->ee_len = cpu_to_le16(allocated); if (ex2 != ex) @@ -2924,7 +2923,7 @@ fix_extent_len: } /* - * This function is called by ext4_ext_get_blocks() from + * This function is called by ext4_ext_map_blocks() from * ext4_get_blocks_dio_write() when DIO to write * to an uninitialized extent. * @@ -2947,9 +2946,8 @@ fix_extent_len: */ static int ext4_split_unwritten_extents(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, struct ext4_ext_path *path, - ext4_lblk_t iblock, - unsigned int max_blocks, int flags) { struct ext4_extent *ex, newex, orig_ex; @@ -2965,20 +2963,20 @@ static int ext4_split_unwritten_extents(handle_t *handle, ext_debug("ext4_split_unwritten_extents: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)iblock, max_blocks); + (unsigned long long)map->m_lblk, map->m_len); eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; - if (eof_block < iblock + max_blocks) - eof_block = iblock + max_blocks; + if (eof_block < map->m_lblk + map->m_len) + eof_block = map->m_lblk + map->m_len; depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - allocated = ee_len - (iblock - ee_block); - newblock = iblock - ee_block + ext_pblock(ex); + allocated = ee_len - (map->m_lblk - ee_block); + newblock = map->m_lblk - ee_block + ext_pblock(ex); ex2 = ex; orig_ex.ee_block = ex->ee_block; @@ -2996,16 +2994,16 @@ static int ext4_split_unwritten_extents(handle_t *handle, * block where the write begins, and the write completely * covers the extent, then we don't need to split it. */ - if ((iblock == ee_block) && (allocated <= max_blocks)) + if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) return allocated; err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - /* ex1: ee_block to iblock - 1 : uninitialized */ - if (iblock > ee_block) { + /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ + if (map->m_lblk > ee_block) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } @@ -3014,15 +3012,15 @@ static int ext4_split_unwritten_extents(handle_t *handle, * we insert ex3, if ex1 is NULL. This is to avoid temporary * overlap of blocks. */ - if (!ex1 && allocated > max_blocks) - ex2->ee_len = cpu_to_le16(max_blocks); + if (!ex1 && allocated > map->m_len) + ex2->ee_len = cpu_to_le16(map->m_len); /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > max_blocks) { + if (allocated > map->m_len) { unsigned int newdepth; ex3 = &newex; - ex3->ee_block = cpu_to_le32(iblock + max_blocks); - ext4_ext_store_pblock(ex3, newblock + max_blocks); - ex3->ee_len = cpu_to_le16(allocated - max_blocks); + ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); + ext4_ext_store_pblock(ex3, newblock + map->m_len); + ex3->ee_len = cpu_to_le16(allocated - map->m_len); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); if (err == -ENOSPC && may_zeroout) { @@ -3035,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zeroed the full extent */ - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } else if (err) @@ -3055,7 +3053,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, depth = newdepth; ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, iblock, path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; @@ -3069,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, if (err) goto out; - allocated = max_blocks; + allocated = map->m_len; } /* * If there was a change of depth as part of the @@ -3078,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle, */ if (ex1 && ex1 != ex) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } /* - * ex2: iblock to iblock + maxblocks-1 : to be direct IO written, - * uninitialised still. + * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written + * using direct I/O, uninitialised still. */ - ex2->ee_block = cpu_to_le32(iblock); + ex2->ee_block = cpu_to_le32(map->m_lblk); ext4_ext_store_pblock(ex2, newblock); ex2->ee_len = cpu_to_le16(allocated); ext4_ext_mark_uninitialized(ex2); @@ -3188,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, static int ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned int max_blocks, + struct ext4_map_blocks *map, struct ext4_ext_path *path, int flags, - unsigned int allocated, struct buffer_head *bh_result, - ext4_fsblk_t newblock) + unsigned int allocated, ext4_fsblk_t newblock) { int ret = 0; int err = 0; @@ -3199,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" "block %llu, max_blocks %u, flags %d, allocated %u", - inode->i_ino, (unsigned long long)iblock, max_blocks, + inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, flags, allocated); ext4_ext_show_leaf(inode, path); /* get_block() before submit the IO, split the extent */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { - ret = ext4_split_unwritten_extents(handle, - inode, path, iblock, - max_blocks, flags); + ret = ext4_split_unwritten_extents(handle, inode, map, + path, flags); /* * Flag the inode(non aio case) or end_io struct (aio case) * that this IO needs to convertion to written when IO is @@ -3218,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); if (ext4_should_dioread_nolock(inode)) - set_buffer_uninit(bh_result); + map->m_flags |= EXT4_MAP_UNINIT; goto out; } /* IO end_io complete, convert the filled extent to written */ @@ -3246,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * the buffer head will be unmapped so that * a read from the block returns 0s. */ - set_buffer_unwritten(bh_result); + map->m_flags |= EXT4_MAP_UNWRITTEN; goto out1; } /* buffered write, writepage time, convert*/ - ret = ext4_ext_convert_to_initialized(handle, inode, - path, iblock, - max_blocks); + ret = ext4_ext_convert_to_initialized(handle, inode, map, path); if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); out: @@ -3262,7 +3256,7 @@ out: goto out2; } else allocated = ret; - set_buffer_new(bh_result); + map->m_flags |= EXT4_MAP_NEW; /* * if we allocated more blocks than requested * we need to make sure we unmap the extra block @@ -3270,11 +3264,11 @@ out: * unmapped later when we find the buffer_head marked * new. */ - if (allocated > max_blocks) { + if (allocated > map->m_len) { unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, - newblock + max_blocks, - allocated - max_blocks); - allocated = max_blocks; + newblock + map->m_len, + allocated - map->m_len); + allocated = map->m_len; } /* @@ -3288,13 +3282,13 @@ out: ext4_da_update_reserve_space(inode, allocated, 0); map_out: - set_buffer_mapped(bh_result); + map->m_flags |= EXT4_MAP_MAPPED; out1: - if (allocated > max_blocks) - allocated = max_blocks; + if (allocated > map->m_len) + allocated = map->m_len; ext4_ext_show_leaf(inode, path); - bh_result->b_bdev = inode->i_sb->s_bdev; - bh_result->b_blocknr = newblock; + map->m_pblk = newblock; + map->m_len = allocated; out2: if (path) { ext4_ext_drop_refs(path); @@ -3320,10 +3314,8 @@ out2: * * return < 0, error case. */ -int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, - unsigned int max_blocks, struct buffer_head *bh_result, - int flags) +int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags) { struct ext4_ext_path *path = NULL; struct ext4_extent_header *eh; @@ -3334,12 +3326,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; - __clear_bit(BH_New, &bh_result->b_state); ext_debug("blocks %u/%u requested for inode %lu\n", - iblock, max_blocks, inode->i_ino); + map->m_lblk, map->m_len, inode->i_ino); /* check in cache */ - cache_type = ext4_ext_in_cache(inode, iblock, &newex); + cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); if (cache_type) { if (cache_type == EXT4_EXT_CACHE_GAP) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { @@ -3352,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* we should allocate requested block */ } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { /* block is already allocated */ - newblock = iblock + newblock = map->m_lblk - le32_to_cpu(newex.ee_block) + ext_pblock(&newex); /* number of remaining blocks in the extent */ allocated = ext4_ext_get_actual_len(&newex) - - (iblock - le32_to_cpu(newex.ee_block)); + (map->m_lblk - le32_to_cpu(newex.ee_block)); goto out; } else { BUG(); @@ -3365,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, } /* find extent for this block */ - path = ext4_ext_find_extent(inode, iblock, NULL); + path = ext4_ext_find_extent(inode, map->m_lblk, NULL); if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; @@ -3382,7 +3373,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (unlikely(path[depth].p_ext == NULL && depth != 0)) { EXT4_ERROR_INODE(inode, "bad extent address " "iblock: %d, depth: %d pblock %lld", - iblock, depth, path[depth].p_block); + map->m_lblk, depth, path[depth].p_block); err = -EIO; goto out2; } @@ -3400,12 +3391,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ - if (in_range(iblock, ee_block, ee_len)) { - newblock = iblock - ee_block + ee_start; + if (in_range(map->m_lblk, ee_block, ee_len)) { + newblock = map->m_lblk - ee_block + ee_start; /* number of remaining blocks in the extent */ - allocated = ee_len - (iblock - ee_block); - ext_debug("%u fit into %u:%d -> %llu\n", iblock, - ee_block, ee_len, newblock); + allocated = ee_len - (map->m_lblk - ee_block); + ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, + ee_block, ee_len, newblock); /* Do not put uninitialized extent in the cache */ if (!ext4_ext_is_uninitialized(ex)) { @@ -3415,8 +3406,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goto out; } ret = ext4_ext_handle_uninitialized_extents(handle, - inode, iblock, max_blocks, path, - flags, allocated, bh_result, newblock); + inode, map, path, flags, allocated, + newblock); return ret; } } @@ -3430,7 +3421,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * put just found gap into cache to speed up * subsequent requests */ - ext4_ext_put_gap_in_cache(inode, path, iblock); + ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); goto out2; } /* @@ -3438,11 +3429,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ /* find neighbour allocated blocks */ - ar.lleft = iblock; + ar.lleft = map->m_lblk; err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); if (err) goto out2; - ar.lright = iblock; + ar.lright = map->m_lblk; err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); if (err) goto out2; @@ -3453,26 +3444,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is * EXT_UNINIT_MAX_LEN. */ - if (max_blocks > EXT_INIT_MAX_LEN && + if (map->m_len > EXT_INIT_MAX_LEN && !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) - max_blocks = EXT_INIT_MAX_LEN; - else if (max_blocks > EXT_UNINIT_MAX_LEN && + map->m_len = EXT_INIT_MAX_LEN; + else if (map->m_len > EXT_UNINIT_MAX_LEN && (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) - max_blocks = EXT_UNINIT_MAX_LEN; + map->m_len = EXT_UNINIT_MAX_LEN; - /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ - newex.ee_block = cpu_to_le32(iblock); - newex.ee_len = cpu_to_le16(max_blocks); + /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ + newex.ee_block = cpu_to_le32(map->m_lblk); + newex.ee_len = cpu_to_le16(map->m_len); err = ext4_ext_check_overlap(inode, &newex, path); if (err) allocated = ext4_ext_get_actual_len(&newex); else - allocated = max_blocks; + allocated = map->m_len; /* allocate new block */ ar.inode = inode; - ar.goal = ext4_ext_find_goal(inode, path, iblock); - ar.logical = iblock; + ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); + ar.logical = map->m_lblk; ar.len = allocated; if (S_ISREG(inode->i_mode)) ar.flags = EXT4_MB_HINT_DATA; @@ -3506,7 +3497,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, EXT4_STATE_DIO_UNWRITTEN); } if (ext4_should_dioread_nolock(inode)) - set_buffer_uninit(bh_result); + map->m_flags |= EXT4_MAP_UNINIT; } if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { @@ -3518,7 +3509,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goto out2; } last_ex = EXT_LAST_EXTENT(eh); - if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) + if (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + ext4_ext_get_actual_len(last_ex)) EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; } @@ -3536,9 +3527,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); - if (allocated > max_blocks) - allocated = max_blocks; - set_buffer_new(bh_result); + if (allocated > map->m_len) + allocated = map->m_len; + map->m_flags |= EXT4_MAP_NEW; /* * Update reserved blocks/metadata blocks after successful @@ -3552,18 +3543,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * when it is _not_ an uninitialized extent. */ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { - ext4_ext_put_in_cache(inode, iblock, allocated, newblock, + ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, EXT4_EXT_CACHE_EXTENT); ext4_update_inode_fsync_trans(handle, inode, 1); } else ext4_update_inode_fsync_trans(handle, inode, 0); out: - if (allocated > max_blocks) - allocated = max_blocks; + if (allocated > map->m_len) + allocated = map->m_len; ext4_ext_show_leaf(inode, path); - set_buffer_mapped(bh_result); - bh_result->b_bdev = inode->i_sb->s_bdev; - bh_result->b_blocknr = newblock; + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = newblock; + map->m_len = allocated; out2: if (path) { ext4_ext_drop_refs(path); @@ -3729,7 +3720,7 @@ retry: if (ret <= 0) { #ifdef EXT4FS_DEBUG WARN_ON(ret <= 0); - printk(KERN_ERR "%s: ext4_ext_get_blocks " + printk(KERN_ERR "%s: ext4_ext_map_blocks " "returned error inode#%lu, block=%u, " "max_blocks=%u", __func__, inode->i_ino, block, max_blocks); @@ -3806,7 +3797,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) { WARN_ON(ret <= 0); - printk(KERN_ERR "%s: ext4_ext_get_blocks " + printk(KERN_ERR "%s: ext4_ext_map_blocks " "returned error inode#%lu, block=%u, " "max_blocks=%u", __func__, inode->i_ino, block, max_blocks); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 830336d3911b..ff2f5fd681b5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, int ret; /* - * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this + * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by * i_mutex. So we can safely drop the i_data_sem here. @@ -890,9 +890,9 @@ err_out: } /* - * The ext4_ind_get_blocks() function handles non-extents inodes + * The ext4_ind_map_blocks() function handles non-extents inodes * (i.e., using the traditional indirect/double-indirect i_blocks - * scheme) for ext4_get_blocks(). + * scheme) for ext4_map_blocks(). * * Allocation strategy is simple: if we have to allocate something, we will * have to go the whole way to leaf. So let's do it before attaching anything @@ -917,9 +917,8 @@ err_out: * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system * blocks. */ -static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned int maxblocks, - struct buffer_head *bh_result, +static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags) { int err = -EIO; @@ -935,7 +934,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); - depth = ext4_block_to_path(inode, iblock, offsets, + depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); if (depth == 0) @@ -946,10 +945,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, /* Simplest case - block found, no allocation needed */ if (!partial) { first_block = le32_to_cpu(chain[depth - 1].key); - clear_buffer_new(bh_result); count++; /*map more blocks*/ - while (count < maxblocks && count <= blocks_to_boundary) { + while (count < map->m_len && count <= blocks_to_boundary) { ext4_fsblk_t blk; blk = le32_to_cpu(*(chain[depth-1].p + count)); @@ -969,7 +967,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, /* * Okay, we need to do block allocation. */ - goal = ext4_find_goal(inode, iblock, partial); + goal = ext4_find_goal(inode, map->m_lblk, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -979,11 +977,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, * direct blocks to allocate for this branch. */ count = ext4_blks_to_allocate(partial, indirect_blks, - maxblocks, blocks_to_boundary); + map->m_len, blocks_to_boundary); /* * Block out ext4_truncate while we alter the tree */ - err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, + err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, &count, goal, offsets + (partial - chain), partial); @@ -995,18 +993,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, * may need to return -EAGAIN upwards in the worst case. --sct */ if (!err) - err = ext4_splice_branch(handle, inode, iblock, + err = ext4_splice_branch(handle, inode, map->m_lblk, partial, indirect_blks, count); if (err) goto cleanup; - set_buffer_new(bh_result); + map->m_flags |= EXT4_MAP_NEW; ext4_update_inode_fsync_trans(handle, inode, 1); got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = le32_to_cpu(chain[depth-1].key); + map->m_len = count; if (count > blocks_to_boundary) - set_buffer_boundary(bh_result); + map->m_flags |= EXT4_MAP_BOUNDARY; err = count; /* Clean up and exit */ partial = chain + depth - 1; /* the whole chain */ @@ -1016,7 +1016,6 @@ cleanup: brelse(partial->bh); partial--; } - BUFFER_TRACE(bh_result, "returned"); out: return err; } @@ -1203,15 +1202,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, } /* - * The ext4_get_blocks() function tries to look up the requested blocks, + * The ext4_map_blocks() function tries to look up the requested blocks, * and returns if the blocks are already mapped. * * Otherwise it takes the write lock of the i_data_sem and allocate blocks * and store the allocated blocks in the result buffer head and mark it * mapped. * - * If file type is extents based, it will call ext4_ext_get_blocks(), - * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping + * If file type is extents based, it will call ext4_ext_map_blocks(), + * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping * based files * * On success, it returns the number of blocks being mapped or allocate. @@ -1224,35 +1223,30 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, * * It returns the error in case of allocation failure. */ -int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, - unsigned int max_blocks, struct buffer_head *bh, - int flags) +int ext4_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags) { int retval; - clear_buffer_mapped(bh); - clear_buffer_unwritten(bh); - - ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u," - "logical block %lu\n", inode->i_ino, flags, max_blocks, - (unsigned long)block); + map->m_flags = 0; + ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," + "logical block %lu\n", inode->i_ino, flags, map->m_len, + (unsigned long) map->m_lblk); /* * Try to see if we can get the block without requesting a new * file system block. */ down_read((&EXT4_I(inode)->i_data_sem)); if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, 0); + retval = ext4_ext_map_blocks(handle, inode, map, 0); } else { - retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, - bh, 0); + retval = ext4_ind_map_blocks(handle, inode, map, 0); } up_read((&EXT4_I(inode)->i_data_sem)); - if (retval > 0 && buffer_mapped(bh)) { + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { int ret = check_block_validity(inode, "file system corruption", - block, bh->b_blocknr, retval); + map->m_lblk, map->m_pblk, retval); if (ret != 0) return ret; } @@ -1268,7 +1262,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * ext4_ext_get_block() returns th create = 0 * with buffer head unmapped. */ - if (retval > 0 && buffer_mapped(bh)) + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) return retval; /* @@ -1281,7 +1275,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * of BH_Unwritten and BH_Mapped flags being simultaneously * set on the buffer_head. */ - clear_buffer_unwritten(bh); + map->m_flags &= ~EXT4_MAP_UNWRITTEN; /* * New blocks allocate and/or writing to uninitialized extent @@ -1304,13 +1298,11 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * could have changed the inode type in between */ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { - retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, flags); + retval = ext4_ext_map_blocks(handle, inode, map, flags); } else { - retval = ext4_ind_get_blocks(handle, inode, block, - max_blocks, bh, flags); + retval = ext4_ind_map_blocks(handle, inode, map, flags); - if (retval > 0 && buffer_new(bh)) { + if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { /* * We allocated new blocks which will result in * i_data's format changing. Force the migrate @@ -1333,16 +1325,38 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, EXT4_I(inode)->i_delalloc_reserved_flag = 0; up_write((&EXT4_I(inode)->i_data_sem)); - if (retval > 0 && buffer_mapped(bh)) { + if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { int ret = check_block_validity(inode, "file system " "corruption after allocation", - block, bh->b_blocknr, retval); + map->m_lblk, map->m_pblk, + retval); if (ret != 0) return ret; } return retval; } +int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, + unsigned int max_blocks, struct buffer_head *bh, + int flags) +{ + struct ext4_map_blocks map; + int ret; + + map.m_lblk = block; + map.m_len = max_blocks; + + ret = ext4_map_blocks(handle, inode, &map, flags); + if (ret < 0) + return ret; + + bh->b_blocknr = map.m_pblk; + bh->b_size = inode->i_sb->s_blocksize * map.m_len; + bh->b_bdev = inode->i_sb->s_bdev; + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + return ret; +} + /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 -- cgit v1.2.3 From 2ed886852adfcb070bf350e66a0da0d98b2f3ab5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 20:00:00 -0400 Subject: ext4: Convert callers of ext4_get_blocks() to use ext4_map_blocks() This saves a huge amount of stack space by avoiding unnecesary struct buffer_head's from being allocated on the stack. In addition, to make the code easier to understand, collapse and refactor ext4_get_block(), ext4_get_block_write(), noalloc_get_block_write(), into a single function. Signed-off-by: "Theodore Ts'o" --- fs/ext4/dir.c | 12 +- fs/ext4/extents.c | 40 +++---- fs/ext4/inode.c | 327 +++++++++++++++++++++--------------------------------- 3 files changed, 150 insertions(+), 229 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 86cb6d86a048..e3f2700b8be7 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -128,14 +128,14 @@ static int ext4_readdir(struct file *filp, offset = filp->f_pos & (sb->s_blocksize - 1); while (!error && !stored && filp->f_pos < inode->i_size) { - ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); - struct buffer_head map_bh; + struct ext4_map_blocks map; struct buffer_head *bh = NULL; - map_bh.b_state = 0; - err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); + map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); + map.m_len = 1; + err = ext4_map_blocks(NULL, inode, &map, 0); if (err > 0) { - pgoff_t index = map_bh.b_blocknr >> + pgoff_t index = map.m_pblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); if (!ra_has_index(&filp->f_ra, index)) page_cache_sync_readahead( @@ -143,7 +143,7 @@ static int ext4_readdir(struct file *filp, &filp->f_ra, filp, index, 1); filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; - bh = ext4_bread(NULL, inode, blk, 0, &err); + bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); } /* diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 37f938789344..d6801eb7232b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3667,13 +3667,12 @@ static void ext4_falloc_update_inode(struct inode *inode, long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { handle_t *handle; - ext4_lblk_t block; loff_t new_size; unsigned int max_blocks; int ret = 0; int ret2 = 0; int retries = 0; - struct buffer_head map_bh; + struct ext4_map_blocks map; unsigned int credits, blkbits = inode->i_blkbits; /* @@ -3687,13 +3686,13 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) if (S_ISDIR(inode->i_mode)) return -ENODEV; - block = offset >> blkbits; + map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because * If blocksize = 4096 offset = 3072 and len = 2048 */ max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - block; + - map.m_lblk; /* * credits to insert 1 extent into extent tree */ @@ -3706,16 +3705,14 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) } retry: while (ret >= 0 && ret < max_blocks) { - block = block + ret; - max_blocks = max_blocks - ret; + map.m_lblk = map.m_lblk + ret; + map.m_len = max_blocks = max_blocks - ret; handle = ext4_journal_start(inode, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); break; } - map_bh.b_state = 0; - ret = ext4_get_blocks(handle, inode, block, - max_blocks, &map_bh, + ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); if (ret <= 0) { #ifdef EXT4FS_DEBUG @@ -3729,14 +3726,14 @@ retry: ret2 = ext4_journal_stop(handle); break; } - if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, + if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, blkbits) >> blkbits)) new_size = offset + len; else - new_size = (block + ret) << blkbits; + new_size = (map.m_lblk + ret) << blkbits; ext4_falloc_update_inode(inode, mode, new_size, - buffer_new(&map_bh)); + (map.m_flags & EXT4_MAP_NEW)); ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); if (ret2) @@ -3765,42 +3762,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ssize_t len) { handle_t *handle; - ext4_lblk_t block; unsigned int max_blocks; int ret = 0; int ret2 = 0; - struct buffer_head map_bh; + struct ext4_map_blocks map; unsigned int credits, blkbits = inode->i_blkbits; - block = offset >> blkbits; + map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because * If blocksize = 4096 offset = 3072 and len = 2048 */ - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - block; + max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - + map.m_lblk); /* * credits to insert 1 extent into extent tree */ credits = ext4_chunk_trans_blocks(inode, max_blocks); while (ret >= 0 && ret < max_blocks) { - block = block + ret; - max_blocks = max_blocks - ret; + map.m_lblk += ret; + map.m_len = (max_blocks -= ret); handle = ext4_journal_start(inode, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); break; } - map_bh.b_state = 0; - ret = ext4_get_blocks(handle, inode, block, - max_blocks, &map_bh, + ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) { WARN_ON(ret <= 0); printk(KERN_ERR "%s: ext4_ext_map_blocks " "returned error inode#%lu, block=%u, " "max_blocks=%u", __func__, - inode->i_ino, block, max_blocks); + inode->i_ino, map.m_lblk, map.m_len); } ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ff2f5fd681b5..0b1d7c89f93f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1336,133 +1336,112 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, return retval; } -int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, - unsigned int max_blocks, struct buffer_head *bh, - int flags) -{ - struct ext4_map_blocks map; - int ret; - - map.m_lblk = block; - map.m_len = max_blocks; - - ret = ext4_map_blocks(handle, inode, &map, flags); - if (ret < 0) - return ret; - - bh->b_blocknr = map.m_pblk; - bh->b_size = inode->i_sb->s_blocksize * map.m_len; - bh->b_bdev = inode->i_sb->s_bdev; - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; - return ret; -} - /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 -int ext4_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +static int _ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int flags) { handle_t *handle = ext4_journal_current_handle(); + struct ext4_map_blocks map; int ret = 0, started = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; int dio_credits; - if (create && !handle) { + map.m_lblk = iblock; + map.m_len = bh->b_size >> inode->i_blkbits; + + if (flags && !handle) { /* Direct IO write... */ - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); + if (map.m_len > DIO_MAX_BLOCKS) + map.m_len = DIO_MAX_BLOCKS; + dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); handle = ext4_journal_start(inode, dio_credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out; + return ret; } started = 1; } - ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, - create ? EXT4_GET_BLOCKS_CREATE : 0); + ret = ext4_map_blocks(handle, inode, &map, flags); if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); + map_bh(bh, inode->i_sb, map.m_pblk); + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + bh->b_size = inode->i_sb->s_blocksize * map.m_len; ret = 0; } if (started) ext4_journal_stop(handle); -out: return ret; } +int ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + return _ext4_get_block(inode, iblock, bh, + create ? EXT4_GET_BLOCKS_CREATE : 0); +} + /* * `handle' can be NULL if create is zero */ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, ext4_lblk_t block, int create, int *errp) { - struct buffer_head dummy; + struct ext4_map_blocks map; + struct buffer_head *bh; int fatal = 0, err; - int flags = 0; J_ASSERT(handle != NULL || create == 0); - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); - if (create) - flags |= EXT4_GET_BLOCKS_CREATE; - err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); - /* - * ext4_get_blocks() returns number of blocks mapped. 0 in - * case of a HOLE. - */ - if (err > 0) { - if (err > 1) - WARN_ON(1); - err = 0; + map.m_lblk = block; + map.m_len = 1; + err = ext4_map_blocks(handle, inode, &map, + create ? EXT4_GET_BLOCKS_CREATE : 0); + + if (err < 0) + *errp = err; + if (err <= 0) + return NULL; + *errp = 0; + + bh = sb_getblk(inode->i_sb, map.m_pblk); + if (!bh) { + *errp = -EIO; + return NULL; } - *errp = err; - if (!err && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); - if (!bh) { - *errp = -EIO; - goto err; - } - if (buffer_new(&dummy)) { - J_ASSERT(create != 0); - J_ASSERT(handle != NULL); + if (map.m_flags & EXT4_MAP_NEW) { + J_ASSERT(create != 0); + J_ASSERT(handle != NULL); - /* - * Now that we do not always journal data, we should - * keep in mind whether this should always journal the - * new buffer as metadata. For now, regular file - * writes use ext4_get_block instead, so it's not a - * problem. - */ - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - fatal = ext4_journal_get_create_access(handle, bh); - if (!fatal && !buffer_uptodate(bh)) { - memset(bh->b_data, 0, inode->i_sb->s_blocksize); - set_buffer_uptodate(bh); - } - unlock_buffer(bh); - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (!fatal) - fatal = err; - } else { - BUFFER_TRACE(bh, "not a new buffer"); - } - if (fatal) { - *errp = fatal; - brelse(bh); - bh = NULL; + /* + * Now that we do not always journal data, we should + * keep in mind whether this should always journal the + * new buffer as metadata. For now, regular file + * writes use ext4_get_block instead, so it's not a + * problem. + */ + lock_buffer(bh); + BUFFER_TRACE(bh, "call get_create_access"); + fatal = ext4_journal_get_create_access(handle, bh); + if (!fatal && !buffer_uptodate(bh)) { + memset(bh->b_data, 0, inode->i_sb->s_blocksize); + set_buffer_uptodate(bh); } - return bh; + unlock_buffer(bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (!fatal) + fatal = err; + } else { + BUFFER_TRACE(bh, "not a new buffer"); } -err: - return NULL; + if (fatal) { + *errp = fatal; + brelse(bh); + bh = NULL; + } + return bh; } struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, @@ -2050,28 +2029,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) /* * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers * - * @mpd->inode - inode to walk through - * @exbh->b_blocknr - first block on a disk - * @exbh->b_size - amount of space in bytes - * @logical - first logical block to start assignment with - * * the function goes through all passed space and put actual disk * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten */ -static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, - struct buffer_head *exbh) +static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, + struct ext4_map_blocks *map) { struct inode *inode = mpd->inode; struct address_space *mapping = inode->i_mapping; - int blocks = exbh->b_size >> inode->i_blkbits; - sector_t pblock = exbh->b_blocknr, cur_logical; + int blocks = map->m_len; + sector_t pblock = map->m_pblk, cur_logical; struct buffer_head *head, *bh; pgoff_t index, end; struct pagevec pvec; int nr_pages, i; - index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); pagevec_init(&pvec, 0); @@ -2098,17 +2072,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, /* skip blocks out of the range */ do { - if (cur_logical >= logical) + if (cur_logical >= map->m_lblk) break; cur_logical++; } while ((bh = bh->b_this_page) != head); do { - if (cur_logical >= logical + blocks) + if (cur_logical >= map->m_lblk + blocks) break; - if (buffer_delay(bh) || - buffer_unwritten(bh)) { + if (buffer_delay(bh) || buffer_unwritten(bh)) { BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); @@ -2127,7 +2100,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); - if (buffer_uninit(exbh)) + if (map->m_flags & EXT4_MAP_UNINIT) set_buffer_uninit(bh); cur_logical++; pblock++; @@ -2138,21 +2111,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, } -/* - * __unmap_underlying_blocks - just a helper function to unmap - * set of blocks described by @bh - */ -static inline void __unmap_underlying_blocks(struct inode *inode, - struct buffer_head *bh) -{ - struct block_device *bdev = inode->i_sb->s_bdev; - int blocks, i; - - blocks = bh->b_size >> inode->i_blkbits; - for (i = 0; i < blocks; i++) - unmap_underlying_metadata(bdev, bh->b_blocknr + i); -} - static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, sector_t logical, long blk_cnt) { @@ -2214,7 +2172,7 @@ static void ext4_print_free_blocks(struct inode *inode) static int mpage_da_map_blocks(struct mpage_da_data *mpd) { int err, blks, get_blocks_flags; - struct buffer_head new; + struct ext4_map_blocks map; sector_t next = mpd->b_blocknr; unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; loff_t disksize = EXT4_I(mpd->inode)->i_disksize; @@ -2255,15 +2213,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting * variables are updated after the blocks have been allocated. */ - new.b_state = 0; + map.m_lblk = next; + map.m_len = max_blocks; get_blocks_flags = EXT4_GET_BLOCKS_CREATE; if (ext4_should_dioread_nolock(mpd->inode)) get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; if (mpd->b_state & (1 << BH_Delay)) get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; - blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, - &new, get_blocks_flags); + blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); if (blks < 0) { err = blks; /* @@ -2305,10 +2263,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) } BUG_ON(blks == 0); - new.b_size = (blks << mpd->inode->i_blkbits); + if (map.m_flags & EXT4_MAP_NEW) { + struct block_device *bdev = mpd->inode->i_sb->s_bdev; + int i; - if (buffer_new(&new)) - __unmap_underlying_blocks(mpd->inode, &new); + for (i = 0; i < map.m_len; i++) + unmap_underlying_metadata(bdev, map.m_pblk + i); + } /* * If blocks are delayed marked, we need to @@ -2316,7 +2277,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) */ if ((mpd->b_state & (1 << BH_Delay)) || (mpd->b_state & (1 << BH_Unwritten))) - mpage_put_bnr_to_bhs(mpd, next, &new); + mpage_put_bnr_to_bhs(mpd, &map); if (ext4_should_order_data(mpd->inode)) { err = ext4_jbd2_file_inode(handle, mpd->inode); @@ -2534,8 +2495,9 @@ static int __mpage_da_writepage(struct page *page, * initialized properly. */ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) + struct buffer_head *bh, int create) { + struct ext4_map_blocks map; int ret = 0; sector_t invalid_block = ~((sector_t) 0xffff); @@ -2543,16 +2505,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, invalid_block = ~0; BUG_ON(create == 0); - BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); + BUG_ON(bh->b_size != inode->i_sb->s_blocksize); + + map.m_lblk = iblock; + map.m_len = 1; /* * first, we need to know whether the block is allocated already * preallocated blocks are unmapped but should treated * the same as allocated blocks. */ - ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); - if ((ret == 0) && !buffer_delay(bh_result)) { - /* the block isn't (pre)allocated yet, let's reserve space */ + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + return ret; + if (ret == 0) { + if (buffer_delay(bh)) + return 0; /* Not sure this could or should happen */ /* * XXX: __block_prepare_write() unmaps passed block, * is it OK? @@ -2562,26 +2530,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, /* not enough space to reserve */ return ret; - map_bh(bh_result, inode->i_sb, invalid_block); - set_buffer_new(bh_result); - set_buffer_delay(bh_result); - } else if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - if (buffer_unwritten(bh_result)) { - /* A delayed write to unwritten bh should - * be marked new and mapped. Mapped ensures - * that we don't do get_block multiple times - * when we write to the same offset and new - * ensures that we do proper zero out for - * partial write. - */ - set_buffer_new(bh_result); - set_buffer_mapped(bh_result); - } - ret = 0; + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); + return 0; } - return ret; + map_bh(bh, inode->i_sb, map.m_pblk); + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + + if (buffer_unwritten(bh)) { + /* A delayed write to unwritten bh should be marked + * new and mapped. Mapped ensures that we don't do + * get_block multiple times when we write to the same + * offset and new ensures that we do proper zero out + * for partial write. + */ + set_buffer_new(bh); + set_buffer_mapped(bh); + } + return 0; } /* @@ -2603,21 +2571,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, static int noalloc_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - int ret = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); - - /* - * we don't want to do block allocation in writepage - * so call get_block_wrap with create = 0 - */ - ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - return ret; + return _ext4_get_block(inode, iblock, bh_result, 0); } static int bget_one(handle_t *handle, struct buffer_head *bh) @@ -3644,46 +3599,18 @@ out: return ret; } +/* + * ext4_get_block used when preparing for a DIO write or buffer write. + * We allocate an uinitialized extent if blocks haven't been allocated. + * The extent will be converted to initialized after the IO is complete. + */ static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - handle_t *handle = ext4_journal_current_handle(); - int ret = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - int dio_credits; - int started = 0; - ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", inode->i_ino, create); - /* - * ext4_get_block in prepare for a DIO write or buffer write. - * We allocate an uinitialized extent if blocks haven't been allocated. - * The extent will be converted to initialized after IO complete. - */ - create = EXT4_GET_BLOCKS_IO_CREATE_EXT; - - if (!handle) { - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); - handle = ext4_journal_start(inode, dio_credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - started = 1; - } - - ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, - create); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - if (started) - ext4_journal_stop(handle); -out: - return ret; + return _ext4_get_block(inode, iblock, bh_result, + EXT4_GET_BLOCKS_IO_CREATE_EXT); } static void dump_completed_IO(struct inode * inode) -- cgit v1.2.3 From 24676da469f50f433baa347845639662c561d1f6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 21:00:00 -0400 Subject: ext4: Convert calls of ext4_error() to EXT4_ERROR_INODE() EXT4_ERROR_INODE() tends to provide better error information and in a more consistent format. Some errors were not even identifying the inode or directory which was corrupted, which made them not very useful. Addresses-Google-Bug: #2507977 Signed-off-by: "Theodore Ts'o" --- fs/ext4/dir.c | 10 +++---- fs/ext4/ext4.h | 4 +-- fs/ext4/extents.c | 10 +++---- fs/ext4/inode.c | 83 +++++++++++++++++++++++---------------------------- fs/ext4/move_extent.c | 8 ++--- fs/ext4/namei.c | 39 +++++++++++------------- fs/ext4/xattr.c | 35 ++++++++++------------ 7 files changed, 86 insertions(+), 103 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index e3f2700b8be7..6a2b8bcff641 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, error_msg = "inode out of bounds"; if (error_msg != NULL) - __ext4_error(dir->i_sb, function, - "bad entry in directory #%lu: %s - block=%llu" + ext4_error_inode(function, dir, + "bad entry in directory: %s - block=%llu" "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", - dir->i_ino, error_msg, - (unsigned long long) bh->b_blocknr, + error_msg, (unsigned long long) bh->b_blocknr, (unsigned) (offset%bh->b_size), offset, le32_to_cpu(de->inode), rlen, de->name_len); @@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp, */ if (!bh) { if (!dir_has_error) { - ext4_error(sb, "directory #%lu " + EXT4_ERROR_INODE(inode, "directory " "contains a hole at offset %Lu", - inode->i_ino, (unsigned long long) filp->f_pos); dir_has_error = 1; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 57fc0e5c0918..413321ff1e20 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -54,10 +54,10 @@ #endif #define EXT4_ERROR_INODE(inode, fmt, a...) \ - ext4_error_inode(__func__, (inode), (fmt), ## a); + ext4_error_inode(__func__, (inode), (fmt), ## a) #define EXT4_ERROR_FILE(file, fmt, a...) \ - ext4_error_file(__func__, (file), (fmt), ## a); + ext4_error_file(__func__, (file), (fmt), ## a) /* data type for block offset of block group */ typedef int ext4_grpblk_t; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d6801eb7232b..91c3873970e4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -439,10 +439,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode, return 0; corrupted: - __ext4_error(inode->i_sb, function, - "bad header/extent in inode #%lu: %s - magic %x, " + ext4_error_inode(function, inode, + "bad header/extent: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", - inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), + error_msg, le16_to_cpu(eh->eh_magic), le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), max, le16_to_cpu(eh->eh_depth), depth); @@ -1622,9 +1622,7 @@ int ext4_ext_try_to_merge(struct inode *inode, merge_done = 1; WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries) - ext4_error(inode->i_sb, - "inode#%lu, eh->eh_entries = 0!", - inode->i_ino); + EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); } return merge_done; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0b1d7c89f93f..3b6877257580 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, if (blk && unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), blk, 1))) { - __ext4_error(inode->i_sb, function, - "invalid block reference %u " - "in inode #%lu", blk, inode->i_ino); + ext4_error_inode(function, inode, + "invalid block reference %u", blk); return -EIO; } } @@ -1129,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode, ext4_discard_preallocations(inode); } -static int check_block_validity(struct inode *inode, const char *msg, - sector_t logical, sector_t phys, int len) +static int check_block_validity(struct inode *inode, const char *func, + struct ext4_map_blocks *map) { - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { - __ext4_error(inode->i_sb, msg, - "inode #%lu logical block %llu mapped to %llu " - "(size %d)", inode->i_ino, - (unsigned long long) logical, - (unsigned long long) phys, len); + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, + map->m_len)) { + ext4_error_inode(func, inode, + "lblock %lu mapped to illegal pblock %llu " + "(length %d)", (unsigned long) map->m_lblk, + map->m_pblk, map->m_len); return -EIO; } return 0; @@ -1245,8 +1244,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, up_read((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { - int ret = check_block_validity(inode, "file system corruption", - map->m_lblk, map->m_pblk, retval); + int ret = check_block_validity(inode, __func__, map); if (ret != 0) return ret; } @@ -1326,10 +1324,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { - int ret = check_block_validity(inode, "file system " - "corruption after allocation", - map->m_lblk, map->m_pblk, - retval); + int ret = check_block_validity(inode, + "ext4_map_blocks_after_alloc", + map); if (ret != 0) return ret; } @@ -4327,10 +4324,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, count)) { - ext4_error(inode->i_sb, "inode #%lu: " - "attempt to clear blocks %llu len %lu, invalid", - inode->i_ino, (unsigned long long) block_to_free, - count); + EXT4_ERROR_INODE(inode, "attempt to clear invalid " + "blocks %llu len %lu", + (unsigned long long) block_to_free, count); return 1; } @@ -4435,11 +4431,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) ext4_handle_dirty_metadata(handle, inode, this_bh); else - ext4_error(inode->i_sb, - "circular indirect block detected, " - "inode=%lu, block=%llu", - inode->i_ino, - (unsigned long long) this_bh->b_blocknr); + EXT4_ERROR_INODE(inode, + "circular indirect block detected at " + "block %llu", + (unsigned long long) this_bh->b_blocknr); } } @@ -4477,11 +4472,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), nr, 1)) { - ext4_error(inode->i_sb, - "indirect mapped block in inode " - "#%lu invalid (level %d, blk #%lu)", - inode->i_ino, depth, - (unsigned long) nr); + EXT4_ERROR_INODE(inode, + "invalid indirect mapped " + "block %lu (level %d)", + (unsigned long) nr, depth); break; } @@ -4493,9 +4487,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, * (should be rare). */ if (!bh) { - ext4_error(inode->i_sb, - "Read failure, inode=%lu, block=%llu", - inode->i_ino, nr); + EXT4_ERROR_INODE(inode, + "Read failure block=%llu", + (unsigned long long) nr); continue; } @@ -4810,8 +4804,8 @@ static int __ext4_get_inode_loc(struct inode *inode, bh = sb_getblk(sb, block); if (!bh) { - ext4_error(sb, "unable to read inode block - " - "inode=%lu, block=%llu", inode->i_ino, block); + EXT4_ERROR_INODE(inode, "unable to read inode block - " + "block %llu", block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -4909,8 +4903,8 @@ make_io: submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - ext4_error(sb, "unable to read inode block - inode=%lu," - " block=%llu", inode->i_ino, block); + EXT4_ERROR_INODE(inode, "unable to read inode " + "block %llu", block); brelse(bh); return -EIO; } @@ -5121,8 +5115,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { - ext4_error(sb, "bad extended attribute block %llu inode #%lu", - ei->i_file_acl, inode->i_ino); + EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", + ei->i_file_acl); ret = -EIO; goto bad_inode; } else if (ei->i_flags & EXT4_EXTENTS_FL) { @@ -5167,8 +5161,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } else { ret = -EIO; - ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", - inode->i_mode, inode->i_ino); + EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); goto bad_inode; } brelse(iloc.bh); @@ -5406,9 +5399,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - ext4_error(inode->i_sb, "IO error syncing inode, " - "inode=%lu, block=%llu", inode->i_ino, - (unsigned long long)iloc.bh->b_blocknr); + EXT4_ERROR_INODE(inode, + "IO error syncing inode (block=%llu)", + (unsigned long long) iloc.bh->b_blocknr); err = -EIO; } brelse(iloc.bh); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 82621a360932..b8b8ea1ceda8 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -530,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, * new_ext |-------| */ if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { - ext4_error(orig_inode->i_sb, + EXT4_ERROR_INODE(orig_inode, "new_ext_end(%u) should be less than or equal to " "oext->ee_block(%u) + oext_alen(%d) - 1", new_ext_end, le32_to_cpu(oext->ee_block), @@ -693,12 +693,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, while (1) { /* The extent for donor must be found. */ if (!dext) { - ext4_error(donor_inode->i_sb, + EXT4_ERROR_INODE(donor_inode, "The extent for donor must be found"); *err = -EIO; goto out; } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { - ext4_error(donor_inode->i_sb, + EXT4_ERROR_INODE(donor_inode, "Donor offset(%u) and the first block of donor " "extent(%u) should be equal", donor_off, @@ -1355,7 +1355,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, if (ret1 < 0) break; if (*moved_len > len) { - ext4_error(orig_inode->i_sb, + EXT4_ERROR_INODE(orig_inode, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0c070fabd108..bff77b04f0d1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -943,8 +943,8 @@ restart: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ - ext4_error(sb, "reading directory #%lu offset %lu", - dir->i_ino, (unsigned long)block); + EXT4_ERROR_INODE(dir, "reading directory lblock %lu", + (unsigned long) block); brelse(bh); goto next; } @@ -1066,15 +1066,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru __u32 ino = le32_to_cpu(de->inode); brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { - ext4_error(dir->i_sb, "bad inode number: %u", ino); + EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); return ERR_PTR(-EIO); } inode = ext4_iget(dir->i_sb, ino); if (unlikely(IS_ERR(inode))) { if (PTR_ERR(inode) == -ESTALE) { - ext4_error(dir->i_sb, - "deleted inode referenced: %u", - ino); + EXT4_ERROR_INODE(dir, + "deleted inode referenced: %u", + ino); return ERR_PTR(-EIO); } else { return ERR_CAST(inode); @@ -1104,8 +1104,8 @@ struct dentry *ext4_get_parent(struct dentry *child) brelse(bh); if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { - ext4_error(child->d_inode->i_sb, - "bad inode number: %u", ino); + EXT4_ERROR_INODE(child->d_inode, + "bad parent inode number: %u", ino); return ERR_PTR(-EIO); } @@ -1404,9 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, de = (struct ext4_dir_entry_2 *)((char *)fde + ext4_rec_len_from_disk(fde->rec_len, blocksize)); if ((char *) de >= (((char *) root) + blocksize)) { - ext4_error(dir->i_sb, - "invalid rec_len for '..' in inode %lu", - dir->i_ino); + EXT4_ERROR_INODE(dir, "invalid rec_len for '..'"); brelse(bh); return -EIO; } @@ -1915,9 +1913,8 @@ static int empty_dir(struct inode *inode) if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { if (err) - ext4_error(inode->i_sb, - "error %d reading directory #%lu offset 0", - err, inode->i_ino); + EXT4_ERROR_INODE(inode, + "error %d reading directory lblock 0", err); else ext4_warning(inode->i_sb, "bad directory (dir #%lu) - no data block", @@ -1941,17 +1938,17 @@ static int empty_dir(struct inode *inode) de = ext4_next_entry(de1, sb->s_blocksize); while (offset < inode->i_size) { if (!bh || - (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + unsigned int lblock; err = 0; brelse(bh); - bh = ext4_bread(NULL, inode, - offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); + lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); + bh = ext4_bread(NULL, inode, lblock, 0, &err); if (!bh) { if (err) - ext4_error(sb, - "error %d reading directory" - " #%lu offset %u", - err, inode->i_ino, offset); + EXT4_ERROR_INODE(inode, + "error %d reading directory " + "lblock %u", err, lblock); offset += sb->s_blocksize; continue; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b4c5aa8489d8..0a09a2e96f0a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -228,9 +228,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { bad_block: - ext4_error(inode->i_sb, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE(inode, "bad block %llu", + EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -372,9 +371,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE(inode, "bad block %llu", + EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -666,8 +664,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); if (ext4_xattr_check_block(bs->bh)) { - ext4_error(sb, "inode %lu: bad block %llu", - inode->i_ino, EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE(inode, "bad block %llu", + EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -880,8 +878,8 @@ cleanup_dquot: goto cleanup; bad_block: - ext4_error(inode->i_sb, "inode %lu: bad block %llu", - inode->i_ino, EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE(inode, "bad block %llu", + EXT4_I(inode)->i_file_acl); goto cleanup; #undef header @@ -1194,8 +1192,8 @@ retry: if (!bh) goto cleanup; if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, "inode %lu: bad block %llu", - inode->i_ino, EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE(inode, "bad block %llu", + EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -1372,14 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) goto cleanup; bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); if (!bh) { - ext4_error(inode->i_sb, "inode %lu: block %llu read error", - inode->i_ino, EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE(inode, "block %llu read error", + EXT4_I(inode)->i_file_acl); goto cleanup; } if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) { - ext4_error(inode->i_sb, "inode %lu: bad block %llu", - inode->i_ino, EXT4_I(inode)->i_file_acl); + EXT4_ERROR_INODE(inode, "bad block %llu", + EXT4_I(inode)->i_file_acl); goto cleanup; } ext4_xattr_release_block(handle, inode, bh); @@ -1504,9 +1502,8 @@ again: } bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { - ext4_error(inode->i_sb, - "inode %lu: block %lu read error", - inode->i_ino, (unsigned long) ce->e_block); + EXT4_ERROR_INODE(inode, "block %lu read error", + (unsigned long) ce->e_block); } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= EXT4_XATTR_REFCOUNT_MAX) { ea_idebug(inode, "block %lu refcount %d>=%d", -- cgit v1.2.3 From 12e9b892002d9af057655d35b44db8ee9243b0dc Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 16 May 2010 22:00:00 -0400 Subject: ext4: Use bitops to read/modify i_flags in struct ext4_inode_info At several places we modify EXT4_I(inode)->i_flags without holding i_mutex (ext4_do_update_inode, ...). These modifications are racy and we can lose updates to i_flags. So convert handling of i_flags to use bitops which are atomic. https://bugzilla.kernel.org/show_bug.cgi?id=15792 Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/dir.c | 4 +- fs/ext4/ext4.h | 109 +++++++++++++++++++++++++++++++++++++++++++------- fs/ext4/ext4_jbd2.h | 8 ++-- fs/ext4/extents.c | 10 ++--- fs/ext4/file.c | 2 +- fs/ext4/ialloc.c | 4 +- fs/ext4/inode.c | 30 +++++++------- fs/ext4/mballoc.c | 4 +- fs/ext4/migrate.c | 2 +- fs/ext4/move_extent.c | 4 +- fs/ext4/namei.c | 10 ++--- fs/ext4/super.c | 1 + fs/ext4/xattr.c | 4 +- 13 files changed, 136 insertions(+), 56 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 6a2b8bcff641..ea5e6cb7e2a5 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -110,7 +110,7 @@ static int ext4_readdir(struct file *filp, if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_COMPAT_DIR_INDEX) && - ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || + ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || ((inode->i_size >> sb->s_blocksize_bits) == 1))) { err = ext4_dx_readdir(filp, dirent, filldir); if (err != ERR_BAD_DX_DIR) { @@ -121,7 +121,7 @@ static int ext4_readdir(struct file *filp, * We don't set the inode dirty flag since it's not * critical that it get flushed back to the disk. */ - EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; + ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); } stored = 0; offset = filp->f_pos & (sb->s_blocksize - 1); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 413321ff1e20..74414884580c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -344,6 +344,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) return flags & EXT4_OTHER_FLMASK; } +/* + * Inode flags used for atomic set/get + */ +enum { + EXT4_INODE_SECRM = 0, /* Secure deletion */ + EXT4_INODE_UNRM = 1, /* Undelete */ + EXT4_INODE_COMPR = 2, /* Compress file */ + EXT4_INODE_SYNC = 3, /* Synchronous updates */ + EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ + EXT4_INODE_APPEND = 5, /* writes to file may only append */ + EXT4_INODE_NODUMP = 6, /* do not dump file */ + EXT4_INODE_NOATIME = 7, /* do not update atime */ +/* Reserved for compression usage... */ + EXT4_INODE_DIRTY = 8, + EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ + EXT4_INODE_NOCOMPR = 10, /* Don't compress */ + EXT4_INODE_ECOMPR = 11, /* Compression error */ +/* End compression flags --- maybe not all used */ + EXT4_INODE_INDEX = 12, /* hash-indexed directory */ + EXT4_INODE_IMAGIC = 13, /* AFS directory */ + EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ + EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ + EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ + EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ + EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ + EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ + EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ + EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ + EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ +}; + +#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) +#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \ + printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \ + EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); } + +/* + * Since it's pretty easy to mix up bit numbers and hex values, and we + * can't do a compile-time test for ENUM values, we use a run-time + * test to make sure that EXT4_XXX_FL is consistent with respect to + * EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop + * out so it won't cost any extra space in the compiled kernel image. + * But it's important that these values are the same, since we are + * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL + * must be consistent with the values of FS_XXX_FL defined in + * include/linux/fs.h and the on-disk values found in ext2, ext3, and + * ext4 filesystems, and of course the values defined in e2fsprogs. + * + * It's not paranoia if the Murphy's Law really *is* out to get you. :-) + */ +static inline void ext4_check_flag_values(void) +{ + CHECK_FLAG_VALUE(SECRM); + CHECK_FLAG_VALUE(UNRM); + CHECK_FLAG_VALUE(COMPR); + CHECK_FLAG_VALUE(SYNC); + CHECK_FLAG_VALUE(IMMUTABLE); + CHECK_FLAG_VALUE(APPEND); + CHECK_FLAG_VALUE(NODUMP); + CHECK_FLAG_VALUE(NOATIME); + CHECK_FLAG_VALUE(DIRTY); + CHECK_FLAG_VALUE(COMPRBLK); + CHECK_FLAG_VALUE(NOCOMPR); + CHECK_FLAG_VALUE(ECOMPR); + CHECK_FLAG_VALUE(INDEX); + CHECK_FLAG_VALUE(IMAGIC); + CHECK_FLAG_VALUE(JOURNAL_DATA); + CHECK_FLAG_VALUE(NOTAIL); + CHECK_FLAG_VALUE(DIRSYNC); + CHECK_FLAG_VALUE(TOPDIR); + CHECK_FLAG_VALUE(HUGE_FILE); + CHECK_FLAG_VALUE(EXTENTS); + CHECK_FLAG_VALUE(EA_INODE); + CHECK_FLAG_VALUE(EOFBLOCKS); + CHECK_FLAG_VALUE(RESERVED); +} + /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { __u32 group; /* Group number for this data */ @@ -639,9 +716,8 @@ struct ext4_ext_cache { */ struct ext4_inode_info { __le32 i_data[15]; /* unconverted */ - __u32 i_flags; - ext4_fsblk_t i_file_acl; __u32 i_dtime; + ext4_fsblk_t i_file_acl; /* * i_block_group is the number of the block group which contains @@ -652,6 +728,7 @@ struct ext4_inode_info { */ ext4_group_t i_block_group; unsigned long i_state_flags; /* Dynamic state flags */ + unsigned long i_flags; ext4_lblk_t i_dir_start_lookup; #ifdef CONFIG_EXT4_FS_XATTR @@ -1087,20 +1164,22 @@ enum { EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ }; -static inline int ext4_test_inode_state(struct inode *inode, int bit) -{ - return test_bit(bit, &EXT4_I(inode)->i_state_flags); +#define EXT4_INODE_BIT_FNS(name, field) \ +static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ +{ \ + return test_bit(bit, &EXT4_I(inode)->i_##field); \ +} \ +static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ +{ \ + set_bit(bit, &EXT4_I(inode)->i_##field); \ +} \ +static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ +{ \ + clear_bit(bit, &EXT4_I(inode)->i_##field); \ } -static inline void ext4_set_inode_state(struct inode *inode, int bit) -{ - set_bit(bit, &EXT4_I(inode)->i_state_flags); -} - -static inline void ext4_clear_inode_state(struct inode *inode, int bit) -{ - clear_bit(bit, &EXT4_I(inode)->i_state_flags); -} +EXT4_INODE_BIT_FNS(flag, flags) +EXT4_INODE_BIT_FNS(state, state_flags) #else /* Assume that user mode programs are passing in an ext4fs superblock, not * a kernel struct super_block. This will allow us to call the feature-test @@ -1287,7 +1366,7 @@ struct ext4_dir_entry_2 { #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ EXT4_FEATURE_COMPAT_DIR_INDEX) && \ - (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) + ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b79ad5126468..dade0c024797 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode) return 1; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) return 1; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) return 1; return 0; } @@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode) return 0; if (!S_ISREG(inode->i_mode)) return 0; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) return 0; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) return 1; @@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode) return 0; if (EXT4_JOURNAL(inode) == NULL) return 1; - if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) return 0; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) return 1; @@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) return 0; if (!S_ISREG(inode->i_mode)) return 0; - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return 0; if (ext4_should_journal_data(inode)) return 0; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 91c3873970e4..42d8ce91adbd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3498,7 +3498,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, map->m_flags |= EXT4_MAP_UNINIT; } - if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { + if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { if (unlikely(!eh->eh_entries)) { EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 ee_block %d", @@ -3509,7 +3509,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, last_ex = EXT_LAST_EXTENT(eh); if (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + ext4_ext_get_actual_len(last_ex)) - EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { @@ -3650,7 +3650,7 @@ static void ext4_falloc_update_inode(struct inode *inode, * can proceed even if the new size is the same as i_size. */ if (new_size > i_size_read(inode)) - EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; + ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } } @@ -3677,7 +3677,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) * currently supporting (pre)allocate mode for extent-based * files _only_ */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; /* preallocation to directories is currently not supported */ @@ -3922,7 +3922,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int error = 0; /* fallback to generic here if not in extents fmt */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return generic_block_fiemap(inode, fieinfo, start, len, ext4_get_block); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d0776e410f34..5313ae4cda2d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -66,7 +66,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, * is smaller than s_maxbytes, which is for extent-mapped files. */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); size_t length = iov_length(iov, nr_segs); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 52618d5a1773..7f6b5826d5a6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -492,7 +492,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, if (S_ISDIR(mode) && ((parent == sb->s_root->d_inode) || - (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { + (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) { int best_ndir = inodes_per_group; int ret = -1; @@ -1038,7 +1038,7 @@ got: if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { /* set extent flag only for directory, file and normal symlink*/ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { - EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; + ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_ext_tree_init(handle, inode); } } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3b6877257580..c2b0724e8ad6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -931,7 +931,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, int count = 0; ext4_fsblk_t first_block = 0; - J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); + J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); @@ -1059,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, */ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) { - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return ext4_ext_calc_metadata_amount(inode, lblock); return ext4_indirect_calc_metadata_amount(inode, lblock); @@ -1236,7 +1236,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * file system block. */ down_read((&EXT4_I(inode)->i_data_sem)); - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, 0); } else { retval = ext4_ind_map_blocks(handle, inode, map, 0); @@ -1295,7 +1295,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, * We need to check for EXT4 here because migrate * could have changed the inode type in between */ - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { retval = ext4_ext_map_blocks(handle, inode, map, flags); } else { retval = ext4_ind_map_blocks(handle, inode, map, flags); @@ -2325,7 +2325,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, goto flush_it; /* check if thereserved journal credits might overflow */ - if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { + if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { if (nrblocks >= EXT4_MAX_TRANS_DATA) { /* * With non-extent format we are limited by the journal @@ -2779,7 +2779,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) * number of contiguous block. So we will limit * number of contiguous block to a sane value */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && (max_blocks > EXT4_MAX_TRANS_DATA)) max_blocks = EXT4_MAX_TRANS_DATA; @@ -3995,7 +3995,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); @@ -4631,12 +4631,12 @@ void ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return; - EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ext4_ext_truncate(inode); return; } @@ -5473,7 +5473,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if (attr->ia_size > sbi->s_bitmap_maxbytes) { @@ -5486,7 +5486,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE && (attr->ia_size < inode->i_size || - (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { + (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { handle_t *handle; handle = ext4_journal_start(inode, 3); @@ -5518,7 +5518,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } } /* ext4_truncate will clear the flag */ - if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) + if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) ext4_truncate(inode); } @@ -5594,7 +5594,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return ext4_indirect_trans_blocks(inode, nrblocks, chunk); return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); } @@ -5929,9 +5929,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) */ if (val) - EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; + ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); else - EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e5dcdc9bd56b..0bdc0188e5e2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2006,7 +2006,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) sbi = EXT4_SB(sb); ngroups = ext4_get_groups_count(sb); /* non-extent files are limited to low blocks/groups */ - if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))) ngroups = sbi->s_blockfile_groups; BUG_ON(ac->ac_status == AC_STATUS_FOUND); @@ -3171,7 +3171,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) continue; /* non-extent files can't have physical blocks past 2^32 */ - if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && + if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) continue; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 34dcfc52ef44..6f3a27ec30bf 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode) */ if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_INCOMPAT_EXTENTS) || - (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EINVAL; if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index b8b8ea1ceda8..3a6c92ac131c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -977,11 +977,11 @@ mext_check_arguments(struct inode *orig_inode, } /* Ext4 move extent supports only extent based file */ - if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { + if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " "based file [ino:orig %lu]\n", orig_inode->i_ino); return -EOPNOTSUPP; - } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { + } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: donor file is not extents " "based file [ino:donor %lu]\n", donor_inode->i_ino); return -EOPNOTSUPP; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index bff77b04f0d1..7b4bf8feae4e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -656,7 +656,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); dir = dir_file->f_path.dentry->d_inode; - if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { + if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; if (hinfo.hash_version <= DX_HASH_TEA) hinfo.hash_version += @@ -801,7 +801,7 @@ static void ext4_update_dx_flag(struct inode *inode) { if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) - EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); } /* @@ -1416,7 +1416,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, brelse(bh); return retval; } - EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; + ext4_set_inode_flag(dir, EXT4_INODE_INDEX); data1 = bh2->b_data; memcpy (data1, de, len); @@ -1489,7 +1489,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, dentry, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) return retval; - EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL; + ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); } @@ -2294,7 +2294,7 @@ retry: } } else { /* clear the extent format for fast symlink */ - EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); inode->i_op = &ext4_fast_symlink_inode_operations; memcpy((char *)&EXT4_I(inode)->i_data, symname, l); inode->i_size = l-1; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2e8790e801f4..9766b2ae6cdc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4148,6 +4148,7 @@ static int __init init_ext4_fs(void) { int err; + ext4_check_flag_values(); err = init_ext4_system_zone(); if (err) return err; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 0a09a2e96f0a..47431bc662ab 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -818,7 +818,7 @@ inserted: EXT4_I(inode)->i_block_group); /* non-extent files can't have physical blocks past 2^32 */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; block = ext4_new_meta_blocks(handle, inode, @@ -826,7 +826,7 @@ inserted: if (error) goto cleanup; - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); ea_idebug(inode, "creating block %d", block); -- cgit v1.2.3 From f70f362b4a6fe47c239dbfb3efc0cc2c10e4f09c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 23:00:00 -0400 Subject: ext4: Avoid crashing on NULL ptr dereference on a filesystem error If the EOFBLOCK_FL flag is set when it should not be and the inode is zero length, then eh_entries is zero, and ex is NULL, so dereferencing ex to print ex->ee_block causes a kernel OOPS in ext4_ext_map_blocks(). On top of that, the error message which is printed isn't very helpful. So we fix this by printing something more explanatory which doesn't involve trying to print ex->ee_block. Addresses-Google-Bug: #2655740 Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 42d8ce91adbd..e40d2b793f2d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3370,8 +3370,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ if (unlikely(path[depth].p_ext == NULL && depth != 0)) { EXT4_ERROR_INODE(inode, "bad extent address " - "iblock: %d, depth: %d pblock %lld", - map->m_lblk, depth, path[depth].p_block); + "lblock: %lu, depth: %d pblock %lld", + (unsigned long) map->m_lblk, depth, + path[depth].p_block); err = -EIO; goto out2; } @@ -3501,8 +3502,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { if (unlikely(!eh->eh_entries)) { EXT4_ERROR_INODE(inode, - "eh->eh_entries == 0 ee_block %d", - ex->ee_block); + "eh->eh_entries == 0 and " + "EOFBLOCKS_FL set"); err = -EIO; goto out2; } -- cgit v1.2.3 From 786ec7915e530936b9eb2e3d12274145cab7aa7d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 17 May 2010 00:00:00 -0400 Subject: ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted Dimitry Monakhov discovered an edge case where it was possible for the EXT4_EOFBLOCKS_FL flag could get cleared unnecessarily. This is true; I have a test case that can be exercised via downloading and decompressing the file: wget ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/ext4-testcases/eofblocks-fl-test-case.img.bz2 bunzip2 eofblocks-fl-test-case.img dd if=/dev/zero of=eofblocks-fl-test-case.img bs=1k seek=17925 bs=1k count=1 conv=notrunc However, triggering it in real life is highly unlikely since it requires an extremely fragmented sparse file with a hole in exactly the right place in the extent tree. (It actually took quite a bit of work to generate this test case.) Still, it's nice to get even extreme corner cases to be correct, so this patch makes sure that we don't clear the EXT4_EOFBLOCKS_FL incorrectly even in this corner case. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e40d2b793f2d..ffcaa1137def 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3319,7 +3319,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent newex, *ex, *last_ex; ext4_fsblk_t newblock; - int err = 0, depth, ret, cache_type; + int i, err = 0, depth, ret, cache_type; unsigned int allocated = 0; struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; @@ -3508,8 +3508,20 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, goto out2; } last_ex = EXT_LAST_EXTENT(eh); - if (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) - + ext4_ext_get_actual_len(last_ex)) + /* + * If the current leaf block was reached by looking at + * the last index block all the way down the tree, and + * we are extending the inode beyond the last extent + * in the current leaf block, then clear the + * EOFBLOCKS_FL flag. + */ + for (i = depth-1; i >= 0; i--) { + if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) + break; + } + if ((i < 0) && + (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + + ext4_ext_get_actual_len(last_ex))) ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); -- cgit v1.2.3 From 0617b83fa239db9743a18ce6cc0e556f4d0fd567 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 17 May 2010 01:00:00 -0400 Subject: ext4: restart ext4_ext_remove_space() after transaction restart If i_data_sem was internally dropped due to transaction restart, it is necessary to restart path look-up because extents tree was possibly modified by ext4_get_block(). https://bugzilla.kernel.org/show_bug.cgi?id=15827 Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" Acked-by: Jan Kara --- fs/ext4/extents.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ffcaa1137def..0a47becf668a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, if (err <= 0) return err; err = ext4_truncate_restart_trans(handle, inode, needed); - /* - * We have dropped i_data_sem so someone might have cached again - * an extent we are going to truncate. - */ - ext4_ext_invalidate_cache(inode); + if (err == 0) + err = -EAGAIN; return err; } @@ -2359,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) int depth = ext_depth(inode); struct ext4_ext_path *path; handle_t *handle; - int i = 0, err = 0; + int i, err; ext_debug("truncate since %u\n", start); @@ -2368,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) if (IS_ERR(handle)) return PTR_ERR(handle); +again: ext4_ext_invalidate_cache(inode); /* * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ + depth = ext_depth(inode); path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; } + path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); if (ext4_ext_check(inode, path[0].p_hdr, depth)) { err = -EIO; goto out; } - path[0].p_depth = depth; + i = err = 0; while (i >= 0 && err == 0) { if (i == depth) { @@ -2478,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) out: ext4_ext_drop_refs(path); kfree(path); + if (err == -EAGAIN) + goto again; ext4_journal_stop(handle); return err; -- cgit v1.2.3 From 60e6679e28518ccd67169c4a539d8cc7490eb8a6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 17 May 2010 07:00:00 -0400 Subject: ext4: Drop whitespace at end of lines This patch was generated using: #!/usr/bin/perl -i while (<>) { s/[ ]+$//; print; } Signed-off-by: "Theodore Ts'o" --- fs/ext4/block_validity.c | 4 ++-- fs/ext4/ext4.h | 4 ++-- fs/ext4/extents.c | 8 ++++---- fs/ext4/fsync.c | 2 +- fs/ext4/inode.c | 4 ++-- fs/ext4/ioctl.c | 2 +- fs/ext4/mballoc.c | 8 ++++---- fs/ext4/namei.c | 10 +++++----- fs/ext4/super.c | 4 ++-- 9 files changed, 23 insertions(+), 23 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 538c48655084..5b6973fbf1bd 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi, else if (start_blk >= (entry->start_blk + entry->count)) n = &(*n)->rb_right; else { - if (start_blk + count > (entry->start_blk + + if (start_blk + count > (entry->start_blk + entry->count)) - entry->count = (start_blk + count - + entry->count = (start_blk + count - entry->start_blk); new_node = *n; new_entry = rb_entry(new_node, struct ext4_system_zone, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2c1165f731ac..124ef771a618 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -75,7 +75,7 @@ typedef __u32 ext4_lblk_t; typedef unsigned int ext4_group_t; /* - * Flags used in mballoc's allocation_context flags field. + * Flags used in mballoc's allocation_context flags field. * * Also used to show what's going on for debugging purposes when the * flag field is exported via the traceport interface @@ -470,7 +470,7 @@ struct ext4_new_group_data { #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ EXT4_GET_BLOCKS_CREATE) /* Caller is from the delayed allocation writeout path, - so set the magic i_delalloc_reserve_flag after taking the + so set the magic i_delalloc_reserve_flag after taking the inode allocation semaphore for */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 /* caller is from the direct IO path, request to creation of an diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0a47becf668a..377309c1af65 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -182,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { /* * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME - * block groups per flexgroup, reserve the first block - * group for directories and special files. Regular + * block groups per flexgroup, reserve the first block + * group for directories and special files. Regular * files will start at the second block group. This - * tends to speed up directory access and improves + * tends to speed up directory access and improves * fsck times. */ block_group &= ~(flex_size-1); @@ -2034,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, struct ext4_ext_cache *cex; int ret = EXT4_EXT_CACHE_NO; - /* + /* * We borrow i_block_reservation_lock to protect i_cached_extent */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 42bd94a942c9..6ca7b6e59d83 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -66,7 +66,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) ret = flush_completed_IO(inode); if (ret < 0) return ret; - + if (!journal) return simple_fsync(file, dentry, datasync); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8e45331b6560..502b07dc70d0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -784,7 +784,7 @@ failed: /* Allocation failed, free what we already allocated */ ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); for (i = 1; i <= n ; i++) { - /* + /* * branch[i].bh is newly allocated, so there is no * need to revoke the block, which is why we don't * need to set EXT4_FREE_BLOCKS_METADATA. @@ -874,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, err_out: for (i = 1; i <= num; i++) { - /* + /* * branch[i].bh is newly allocated, so there is no * need to revoke the block, which is why we don't * need to set EXT4_FREE_BLOCKS_METADATA. diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 6ddec84846cd..bf5ae883b1bd 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -258,7 +258,7 @@ setversion_out: if (me.moved_len > 0) file_remove_suid(donor_filp); - if (copy_to_user((struct move_extent __user *)arg, + if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) err = -EFAULT; mext_out: diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a93f554b6803..12b3bc026a68 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2691,7 +2691,7 @@ int __init init_ext4_mballoc(void) void exit_ext4_mballoc(void) { - /* + /* * Wait for completion of call_rcu()'s on ext4_pspace_cachep * before destroying the slab cache. */ @@ -3330,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, spin_unlock(&pa->pa_lock); grp_blk = pa->pa_pstart; - /* + /* * If doing group-based preallocation, pa_pstart may be in the * next group when pa is used up */ @@ -4534,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, if (!bh) tbh = sb_find_get_block(inode->i_sb, block + i); - ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, inode, tbh, block + i); } } - /* + /* * We need to make sure we don't reuse the freed block until * after the transaction is committed, which we can do by * treating the block as metadata, below. We make an diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7b4bf8feae4e..83be743c8d1f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -187,7 +187,7 @@ unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) return blocksize; return (len & 65532) | ((len & 3) << 16); } - + __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) { if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) @@ -197,7 +197,7 @@ __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) if (len == blocksize) { if (blocksize == 65536) return cpu_to_le16(EXT4_MAX_REC_LEN); - else + else return cpu_to_le16(0); } return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); @@ -349,7 +349,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, brelse(bh); } if (bcount) - printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", + printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", levels ? "" : " ", names, space/bcount, (space/bcount)*100/blocksize); return (struct stats) { names, space, bcount}; @@ -653,7 +653,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, int ret, err; __u32 hashval; - dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", + dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); dir = dir_file->f_path.dentry->d_inode; if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { @@ -1141,7 +1141,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, unsigned rec_len = 0; while (count--) { - struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + (map->offs<<2)); rec_len = EXT4_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9766b2ae6cdc..49d88c0597c4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2216,7 +2216,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) struct ext4_attr { struct attribute attr; ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); - ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, + ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, const char *, size_t); int offset; }; @@ -3383,7 +3383,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!(sb->s_flags & MS_RDONLY)) es->s_wtime = cpu_to_le32(get_seconds()); es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - EXT4_SB(sb)->s_sectors_written_start) >> 1)); ext4_free_blocks_count_set(es, percpu_counter_sum_positive( -- cgit v1.2.3