diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 4 | ||||
-rw-r--r-- | fs/ext4/extents.c | 10 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 19 | ||||
-rw-r--r-- | fs/ext4/inode.c | 55 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 2 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/super.c | 78 |
8 files changed, 118 insertions, 54 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 97b970e7dd13..1c67139ad4b4 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -547,7 +547,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, * * ext4_should_retry_alloc() is called when ENOSPC is returned, and if * it is profitable to retry the operation, this function will wait - * for the current or commiting transaction to complete, and then + * for the current or committing transaction to complete, and then * return TRUE. * * if the total number of retries exceed three times, return FALSE. diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index e25e99bf7ee1..d0f53538a57f 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -86,8 +86,8 @@ #ifdef CONFIG_QUOTA /* Amount of blocks needed for quota update - we know that the structure was - * allocated so we need to update only inode+data */ -#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) + * allocated so we need to update only data block */ +#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) /* Amount of blocks needed for quota insert/delete - we do some block writes * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index dd2cb5076ff9..4890d6f3ad15 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1729,7 +1729,7 @@ repeat: BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { - ext_debug("next leaf isnt full(%d)\n", + ext_debug("next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; goto repeat; @@ -2533,7 +2533,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) /* * This function is called by ext4_ext_map_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized - * extent into multiple extents (upto three - one initialized and two + * extent into multiple extents (up to three - one initialized and two * uninitialized). * There are three possibilities: * a> There is no split required: Entire extent should be initialized @@ -3174,7 +3174,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, path, flags); /* * Flag the inode(non aio case) or end_io struct (aio case) - * that this IO needs to convertion to written when IO is + * that this IO needs to conversion to written when IO is * completed */ if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { @@ -3460,10 +3460,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_ext_mark_uninitialized(&newex); /* * io_end structure was created for every IO write to an - * uninitialized extent. To avoid unecessary conversion, + * uninitialized extent. To avoid unnecessary conversion, * here we flag the IO that really needs the conversion. * For non asycn direct IO case, flag the inode state - * that we need to perform convertion when IO is done. + * that we need to perform conversion when IO is done. */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 7f74019d6d77..e9473cbe80df 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -101,7 +101,7 @@ extern int ext4_flush_completed_IO(struct inode *inode) * to the work-to-be schedule is freed. * * Thus we need to keep the io structure still valid here after - * convertion finished. The io structure has a flag to + * conversion finished. The io structure has a flag to * avoid double converting from both fsync and background work * queue work. */ @@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode) * the parent directory's parent as well, and so on recursively, if * they are also freshly created. */ -static void ext4_sync_parent(struct inode *inode) +static int ext4_sync_parent(struct inode *inode) { + struct writeback_control wbc; struct dentry *dentry = NULL; + int ret = 0; while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); @@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode) if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) break; inode = dentry->d_parent->d_inode; - sync_mapping_buffers(inode->i_mapping); + ret = sync_mapping_buffers(inode->i_mapping); + if (ret) + break; + memset(&wbc, 0, sizeof(wbc)); + wbc.sync_mode = WB_SYNC_ALL; + wbc.nr_to_write = 0; /* only write out the inode */ + ret = sync_inode(inode, &wbc); + if (ret) + break; } + return ret; } /* @@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync) if (!journal) { ret = generic_file_fsync(file, datasync); if (!ret && !list_empty(&inode->i_dentry)) - ext4_sync_parent(inode); + ret = ext4_sync_parent(inode); goto out; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1a86282b9024..f2fa5e8a582c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * for partial write. */ set_buffer_new(bh); + set_buffer_mapped(bh); } return 0; } @@ -2588,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); * because we should have holes filled from ext4_page_mkwrite(). We even don't * need to file the inode to the transaction's list in ordered mode because if * we are writing back data added by write(), the inode is already there and if - * we are writing back data modified via mmap(), noone guarantees in which + * we are writing back data modified via mmap(), no one guarantees in which * transaction the data will hit the disk. In case we are journaling data, we * cannot start transaction directly because transaction start ranks above page * lock so we have to do some magic. @@ -2690,7 +2691,7 @@ static int ext4_writepage(struct page *page, /* * This is called via ext4_da_writepages() to - * calulate the total number of credits to reserve to fit + * calculate the total number of credits to reserve to fit * a single extent allocation into a single transaction, * ext4_da_writpeages() will loop calling this before * the block allocation. @@ -3304,7 +3305,7 @@ int ext4_alloc_da_blocks(struct inode *inode) * the pages by calling redirty_page_for_writepage() but that * would be ugly in the extreme. So instead we would need to * replicate parts of the code in the above functions, - * simplifying them becuase we wouldn't actually intend to + * simplifying them because we wouldn't actually intend to * write out the pages, but rather only collect contiguous * logical block extents, call the multi-block allocator, and * then update the buffer heads with the block allocations. @@ -3694,7 +3695,7 @@ retry: * * The unwrritten extents will be converted to written when DIO is completed. * For async direct IO, since the IO may still pending when return, we - * set up an end_io call back function, which will do the convertion + * set up an end_io call back function, which will do the conversion * when async direct IO completed. * * If the O_DIRECT write will extend the file then add this inode to the @@ -3717,7 +3718,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * We could direct write to holes and fallocate. * * Allocated blocks to fill the hole are marked as uninitialized - * to prevent paralel buffered read to expose the stale data + * to prevent parallel buffered read to expose the stale data * before DIO complete the data IO. * * As to previously fallocated extents, ext4 get_block @@ -3778,7 +3779,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, int err; /* * for non AIO case, since the IO is already - * completed, we could do the convertion right here + * completed, we could do the conversion right here */ err = ext4_convert_unwritten_extents(inode, offset, ret); @@ -4025,7 +4026,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) * * When we do truncate() we may have to clean the ends of several * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is refered + * partially truncated if some data below the new i_size is referred * from it (and it is on the path to the first completely truncated * data block, indeed). We have to free the top of that path along * with everything to the right of the path. Since no allocation @@ -4169,7 +4170,7 @@ out_err: * @first: array of block numbers * @last: points immediately past the end of array * - * We are freeing all blocks refered from that array (numbers are stored as + * We are freeing all blocks referred from that array (numbers are stored as * little-endian 32-bit) and updating @inode->i_blocks appropriately. * * We accumulate contiguous runs of blocks to free. Conveniently, if these @@ -4261,7 +4262,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, * @last: pointer immediately past the end of array * @depth: depth of the branches to free * - * We are freeing all blocks refered from these branches (numbers are + * We are freeing all blocks referred from these branches (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */ @@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode) Indirect chain[4]; Indirect *partial; __le32 nr = 0; - int n; - ext4_lblk_t last_block; + int n = 0; + ext4_lblk_t last_block, max_block; unsigned blocksize = inode->i_sb->s_blocksize; trace_ext4_truncate_enter(inode); @@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode) last_block = (inode->i_size + blocksize-1) >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); if (inode->i_size & (blocksize - 1)) if (ext4_block_truncate_page(handle, mapping, inode->i_size)) goto out_stop; - n = ext4_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ + if (last_block != max_block) { + n = ext4_block_to_path(inode, last_block, offsets, NULL); + if (n == 0) + goto out_stop; /* error */ + } /* * OK. This truncate is going to happen. We add the inode to the @@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode) */ ei->i_disksize = inode->i_size; - if (n == 1) { /* direct blocks */ + if (last_block == max_block) { + /* + * It is unnecessary to free any data blocks if last_block is + * equal to the indirect block limit. + */ + goto out_unlock; + } else if (n == 1) { /* direct blocks */ ext4_free_data(handle, inode, NULL, i_data+offsets[0], i_data + EXT4_NDIR_BLOCKS); goto do_indirects; @@ -4553,6 +4564,7 @@ do_indirects: ; } +out_unlock: up_write(&ei->i_data_sem); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); @@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, /* if nrblocks are contiguous */ if (chunk) { /* - * With N contiguous data blocks, it need at most - * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks - * 2 dindirect blocks - * 1 tindirect block + * With N contiguous data blocks, we need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, + * 2 dindirect blocks, and 1 tindirect block */ - indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); - return indirects + 3; + return DIV_ROUND_UP(nrblocks, + EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; } /* * if nrblocks are not contiguous, worse case, each block touch @@ -5478,7 +5489,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) } /* - * Calulate the total number of credits to reserve to fit + * Calculate the total number of credits to reserve to fit * the modification of a single pages into a single transaction, * which may include multiple chunks of block allocations. * diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a5837a837a8b..d8a16eecf1d5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -92,7 +92,7 @@ * between CPUs. It is possible to get scheduled at this point. * * The locality group prealloc space is used looking at whether we have - * enough free space (pa_free) withing the prealloc space. + * enough free space (pa_free) within the prealloc space. * * If we can't allocate blocks via inode prealloc or/and locality group * prealloc then we look at the buddy cache. The buddy cache is represented diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index d1bafa57f483..92816b4e0f16 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -517,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode) * start with one credit accounted for * superblock modification. * - * For the tmp_inode we already have commited the + * For the tmp_inode we already have committed the * trascation that created the inode. Later as and * when we add extents we extent the journal */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 22546ad7f0ae..8553dfb310af 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle) * journal_end calls result in the superblock being marked dirty, so * that sync() will call the filesystem's write_super callback if * appropriate. + * + * To avoid j_barrier hold in userspace when a user calls freeze(), + * ext4 prevents a new handle from being started by s_frozen, which + * is in an upper layer. */ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) { journal_t *journal; + handle_t *handle; if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); - vfs_check_frozen(sb, SB_FREEZE_TRANS); - /* Special case here: if the journal has aborted behind our - * backs (eg. EIO in the commit thread), then we still need to - * take the FS itself readonly cleanly. */ journal = EXT4_SB(sb)->s_journal; - if (journal) { - if (is_journal_aborted(journal)) { - ext4_abort(sb, "Detected aborted journal"); - return ERR_PTR(-EROFS); - } - return jbd2_journal_start(journal, nblocks); + handle = ext4_journal_current_handle(); + + /* + * If a handle has been started, it should be allowed to + * finish, otherwise deadlock could happen between freeze + * and others(e.g. truncate) due to the restart of the + * journal handle if the filesystem is forzen and active + * handles are not stopped. + */ + if (!handle) + vfs_check_frozen(sb, SB_FREEZE_TRANS); + + if (!journal) + return ext4_get_nojournal(); + /* + * Special case here: if the journal has aborted behind our + * backs (eg. EIO in the commit thread), then we still need to + * take the FS itself readonly cleanly. + */ + if (is_journal_aborted(journal)) { + ext4_abort(sb, "Detected aborted journal"); + return ERR_PTR(-EROFS); } - return ext4_get_nojournal(); + return jbd2_journal_start(journal, nblocks); } /* @@ -617,7 +634,7 @@ __acquires(bitlock) * filesystem will have already been marked read/only and the * journal has been aborted. We return 1 as a hint to callers * who might what to use the return value from - * ext4_grp_locked_error() to distinguish beween the + * ext4_grp_locked_error() to distinguish between the * ERRORS_CONT and ERRORS_RO case, and perhaps return more * aggressively from the ext4 function in question, with a * more appropriate error code. @@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb, mutex_unlock(&ext4_li_info->li_list_mtx); sbi->s_li_request = elr; + /* + * set elr to NULL here since it has been inserted to + * the request_list and the removal and free of it is + * handled by ext4_clear_request_list from now on. + */ + elr = NULL; if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { ret = ext4_run_lazyinit_thread(); @@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + init_timer(&sbi->s_err_report); + sbi->s_err_report.function = print_daily_error_info; + sbi->s_err_report.data = (unsigned long) sb; + err = percpu_counter_init(&sbi->s_freeblocks_counter, ext4_count_free_blocks(sb)); if (!err) { @@ -3646,9 +3673,6 @@ no_journal: "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : "", orig_data); - init_timer(&sbi->s_err_report); - sbi->s_err_report.function = print_daily_error_info; - sbi->s_err_report.data = (unsigned long) sb; if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -3672,6 +3696,7 @@ failed_mount_wq: sbi->s_journal = NULL; } failed_mount3: + del_timer(&sbi->s_err_report); if (sbi->s_flex_groups) { if (is_vmalloc_addr(sbi->s_flex_groups)) vfree(sbi->s_flex_groups); @@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait) /* * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. + * + * Note that only this function cannot bring a filesystem to be in a clean + * state independently, because ext4 prevents a new handle from being started + * by @sb->s_frozen, which stays in an upper layer. It thus needs help from + * the upper layer. */ static int ext4_freeze(struct super_block *sb) { @@ -4614,17 +4644,30 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, static int ext4_quota_off(struct super_block *sb, int type) { + struct inode *inode = sb_dqopt(sb)->files[type]; + handle_t *handle; + /* Force all delayed allocation blocks to be allocated. * Caller already holds s_umount sem */ if (test_opt(sb, DELALLOC)) sync_filesystem(sb); + /* Update modification times of quota files when userspace can + * start looking at them */ + handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) + goto out; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + +out: return dquot_quota_off(sb, type); } /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) @@ -4714,9 +4757,8 @@ out: if (inode->i_size < off + len) { i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; + ext4_mark_inode_dirty(handle, inode); } - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext4_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); return len; } |