summaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c254
1 files changed, 177 insertions, 77 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2b5ef1b64249..9d9f414f99fe 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -222,13 +222,13 @@ void ext4_evict_inode(struct inode *inode)
/*
* For inodes with journalled data, transaction commit could have
- * dirtied the inode. Flush worker is ignoring it because of I_FREEING
- * flag but we still need to remove the inode from the writeback lists.
+ * dirtied the inode. And for inodes with dioread_nolock, unwritten
+ * extents converting worker could merge extents and also have dirtied
+ * the inode. Flush worker is ignoring it because of I_FREEING flag but
+ * we still need to remove the inode from the writeback lists.
*/
- if (!list_empty_careful(&inode->i_io_list)) {
- WARN_ON_ONCE(!ext4_should_journal_data(inode));
+ if (!list_empty_careful(&inode->i_io_list))
inode_io_list_del(inode);
- }
/*
* Protect us against freezing - iput() caller didn't have to have any
@@ -335,6 +335,12 @@ stop_handle:
ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
+ /*
+ * Check out some where else accidentally dirty the evicting inode,
+ * which may probably cause inode use-after-free issues later.
+ */
+ WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list));
+
if (!list_empty(&EXT4_I(inode)->i_fc_list))
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -1309,7 +1315,8 @@ static int ext4_write_end(struct file *file,
trace_ext4_write_end(inode, pos, len, copied);
- if (ext4_has_inline_data(inode))
+ if (ext4_has_inline_data(inode) &&
+ ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
return ext4_write_inline_data_end(inode, pos, len, copied, page);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -1543,9 +1550,12 @@ void ext4_da_release_space(struct inode *inode, int to_free)
*/
struct mpage_da_data {
+ /* These are input fields for ext4_do_writepages() */
struct inode *inode;
struct writeback_control *wbc;
+ unsigned int can_map:1; /* Can writepages call map blocks? */
+ /* These are internal state of ext4_do_writepages() */
pgoff_t first_page; /* The first page to write */
pgoff_t next_page; /* Current page to examine */
pgoff_t last_page; /* Last page to examine */
@@ -2009,7 +2019,6 @@ static int ext4_writepage(struct page *page,
struct buffer_head *page_bufs = NULL;
struct inode *inode = page->mapping->host;
struct ext4_io_submit io_submit;
- bool keep_towrite = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
folio_invalidate(folio, 0, folio_size(folio));
@@ -2067,7 +2076,6 @@ static int ext4_writepage(struct page *page,
unlock_page(page);
return 0;
}
- keep_towrite = true;
}
if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -2084,7 +2092,7 @@ static int ext4_writepage(struct page *page,
unlock_page(page);
return -ENOMEM;
}
- ret = ext4_bio_write_page(&io_submit, page, len, keep_towrite);
+ ret = ext4_bio_write_page(&io_submit, page, len);
ext4_io_submit(&io_submit);
/* Drop io_end reference we got from init */
ext4_put_io_end_defer(io_submit.io_end);
@@ -2118,7 +2126,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
- err = ext4_bio_write_page(&mpd->io_submit, page, len, false);
+ err = ext4_bio_write_page(&mpd->io_submit, page, len);
if (!err)
mpd->wbc->nr_to_write--;
mpd->first_page++;
@@ -2551,18 +2559,33 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
}
+/* Return true if the page needs to be written as part of transaction commit */
+static bool ext4_page_nomap_can_writeout(struct page *page)
+{
+ struct buffer_head *bh, *head;
+
+ bh = head = page_buffers(page);
+ do {
+ if (buffer_dirty(bh) && buffer_mapped(bh) && !buffer_delay(bh))
+ return true;
+ } while ((bh = bh->b_this_page) != head);
+ return false;
+}
+
/*
* mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
- * and underlying extent to map
+ * needing mapping, submit mapped pages
*
* @mpd - where to look for pages
*
* Walk dirty pages in the mapping. If they are fully mapped, submit them for
- * IO immediately. When we find a page which isn't mapped we start accumulating
- * extent of buffers underlying these pages that needs mapping (formed by
- * either delayed or unwritten buffers). We also lock the pages containing
- * these buffers. The extent found is returned in @mpd structure (starting at
- * mpd->lblk with length mpd->len blocks).
+ * IO immediately. If we cannot map blocks, we submit just already mapped
+ * buffers in the page for IO and keep page dirty. When we can map blocks and
+ * we find a page which isn't mapped we start accumulating extent of buffers
+ * underlying these pages that needs mapping (formed by either delayed or
+ * unwritten buffers). We also lock the pages containing these buffers. The
+ * extent found is returned in @mpd structure (starting at mpd->lblk with
+ * length mpd->len blocks).
*
* Note that this function can attach bios to one io_end structure which are
* neither logically nor physically contiguous. Although it may seem as an
@@ -2653,14 +2676,30 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
if (mpd->map.m_len == 0)
mpd->first_page = page->index;
mpd->next_page = page->index + 1;
- /* Add all dirty buffers to mpd */
- lblk = ((ext4_lblk_t)page->index) <<
- (PAGE_SHIFT - blkbits);
- head = page_buffers(page);
- err = mpage_process_page_bufs(mpd, head, head, lblk);
- if (err <= 0)
- goto out;
- err = 0;
+ /*
+ * Writeout for transaction commit where we cannot
+ * modify metadata is simple. Just submit the page.
+ */
+ if (!mpd->can_map) {
+ if (ext4_page_nomap_can_writeout(page)) {
+ err = mpage_submit_page(mpd, page);
+ if (err < 0)
+ goto out;
+ } else {
+ unlock_page(page);
+ mpd->first_page++;
+ }
+ } else {
+ /* Add all dirty buffers to mpd */
+ lblk = ((ext4_lblk_t)page->index) <<
+ (PAGE_SHIFT - blkbits);
+ head = page_buffers(page);
+ err = mpage_process_page_bufs(mpd, head, head,
+ lblk);
+ if (err <= 0)
+ goto out;
+ err = 0;
+ }
left--;
}
pagevec_release(&pvec);
@@ -2673,25 +2712,27 @@ out:
return err;
}
-static int ext4_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+static int ext4_writepage_cb(struct page *page, struct writeback_control *wbc,
+ void *data)
{
+ return ext4_writepage(page, wbc);
+}
+
+static int ext4_do_writepages(struct mpage_da_data *mpd)
+{
+ struct writeback_control *wbc = mpd->wbc;
pgoff_t writeback_index = 0;
long nr_to_write = wbc->nr_to_write;
int range_whole = 0;
int cycled = 1;
handle_t *handle = NULL;
- struct mpage_da_data mpd;
- struct inode *inode = mapping->host;
+ struct inode *inode = mpd->inode;
+ struct address_space *mapping = inode->i_mapping;
int needed_blocks, rsv_blocks = 0, ret = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
struct blk_plug plug;
bool give_up_on_write = false;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
- return -EIO;
-
- percpu_down_read(&sbi->s_writepages_rwsem);
trace_ext4_writepages(inode, wbc);
/*
@@ -2703,7 +2744,9 @@ static int ext4_writepages(struct address_space *mapping,
goto out_writepages;
if (ext4_should_journal_data(inode)) {
- ret = generic_writepages(mapping, wbc);
+ blk_start_plug(&plug);
+ ret = write_cache_pages(mapping, wbc, ext4_writepage_cb, NULL);
+ blk_finish_plug(&plug);
goto out_writepages;
}
@@ -2757,19 +2800,18 @@ static int ext4_writepages(struct address_space *mapping,
writeback_index = mapping->writeback_index;
if (writeback_index)
cycled = 0;
- mpd.first_page = writeback_index;
- mpd.last_page = -1;
+ mpd->first_page = writeback_index;
+ mpd->last_page = -1;
} else {
- mpd.first_page = wbc->range_start >> PAGE_SHIFT;
- mpd.last_page = wbc->range_end >> PAGE_SHIFT;
+ mpd->first_page = wbc->range_start >> PAGE_SHIFT;
+ mpd->last_page = wbc->range_end >> PAGE_SHIFT;
}
- mpd.inode = inode;
- mpd.wbc = wbc;
- ext4_io_submit_init(&mpd.io_submit, wbc);
+ ext4_io_submit_init(&mpd->io_submit, wbc);
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
+ tag_pages_for_writeback(mapping, mpd->first_page,
+ mpd->last_page);
blk_start_plug(&plug);
/*
@@ -2778,31 +2820,32 @@ retry:
* in the block layer on device congestion while having transaction
* started.
*/
- mpd.do_map = 0;
- mpd.scanned_until_end = 0;
- mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
- if (!mpd.io_submit.io_end) {
+ mpd->do_map = 0;
+ mpd->scanned_until_end = 0;
+ mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
+ if (!mpd->io_submit.io_end) {
ret = -ENOMEM;
goto unplug;
}
- ret = mpage_prepare_extent_to_map(&mpd);
+ ret = mpage_prepare_extent_to_map(mpd);
/* Unlock pages we didn't use */
- mpage_release_unused_pages(&mpd, false);
+ mpage_release_unused_pages(mpd, false);
/* Submit prepared bio */
- ext4_io_submit(&mpd.io_submit);
- ext4_put_io_end_defer(mpd.io_submit.io_end);
- mpd.io_submit.io_end = NULL;
+ ext4_io_submit(&mpd->io_submit);
+ ext4_put_io_end_defer(mpd->io_submit.io_end);
+ mpd->io_submit.io_end = NULL;
if (ret < 0)
goto unplug;
- while (!mpd.scanned_until_end && wbc->nr_to_write > 0) {
+ while (!mpd->scanned_until_end && wbc->nr_to_write > 0) {
/* For each extent of pages we use new io_end */
- mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
- if (!mpd.io_submit.io_end) {
+ mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
+ if (!mpd->io_submit.io_end) {
ret = -ENOMEM;
break;
}
+ WARN_ON_ONCE(!mpd->can_map);
/*
* We have two constraints: We find one extent to map and we
* must always write out whole page (makes a difference when
@@ -2822,16 +2865,16 @@ retry:
"%ld pages, ino %lu; err %d", __func__,
wbc->nr_to_write, inode->i_ino, ret);
/* Release allocated io_end */
- ext4_put_io_end(mpd.io_submit.io_end);
- mpd.io_submit.io_end = NULL;
+ ext4_put_io_end(mpd->io_submit.io_end);
+ mpd->io_submit.io_end = NULL;
break;
}
- mpd.do_map = 1;
+ mpd->do_map = 1;
- trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
- ret = mpage_prepare_extent_to_map(&mpd);
- if (!ret && mpd.map.m_len)
- ret = mpage_map_and_submit_extent(handle, &mpd,
+ trace_ext4_da_write_pages(inode, mpd->first_page, wbc);
+ ret = mpage_prepare_extent_to_map(mpd);
+ if (!ret && mpd->map.m_len)
+ ret = mpage_map_and_submit_extent(handle, mpd,
&give_up_on_write);
/*
* Caution: If the handle is synchronous,
@@ -2846,12 +2889,12 @@ retry:
if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
ext4_journal_stop(handle);
handle = NULL;
- mpd.do_map = 0;
+ mpd->do_map = 0;
}
/* Unlock pages we didn't use */
- mpage_release_unused_pages(&mpd, give_up_on_write);
+ mpage_release_unused_pages(mpd, give_up_on_write);
/* Submit prepared bio */
- ext4_io_submit(&mpd.io_submit);
+ ext4_io_submit(&mpd->io_submit);
/*
* Drop our io_end reference we got from init. We have
@@ -2861,11 +2904,11 @@ retry:
* up doing unwritten extent conversion.
*/
if (handle) {
- ext4_put_io_end_defer(mpd.io_submit.io_end);
+ ext4_put_io_end_defer(mpd->io_submit.io_end);
ext4_journal_stop(handle);
} else
- ext4_put_io_end(mpd.io_submit.io_end);
- mpd.io_submit.io_end = NULL;
+ ext4_put_io_end(mpd->io_submit.io_end);
+ mpd->io_submit.io_end = NULL;
if (ret == -ENOSPC && sbi->s_journal) {
/*
@@ -2885,8 +2928,8 @@ unplug:
blk_finish_plug(&plug);
if (!ret && !cycled && wbc->nr_to_write > 0) {
cycled = 1;
- mpd.last_page = writeback_index - 1;
- mpd.first_page = 0;
+ mpd->last_page = writeback_index - 1;
+ mpd->first_page = 0;
goto retry;
}
@@ -2896,15 +2939,51 @@ unplug:
* Set the writeback_index so that range_cyclic
* mode will write it back later
*/
- mapping->writeback_index = mpd.first_page;
+ mapping->writeback_index = mpd->first_page;
out_writepages:
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
- percpu_up_read(&sbi->s_writepages_rwsem);
return ret;
}
+static int ext4_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct super_block *sb = mapping->host->i_sb;
+ struct mpage_da_data mpd = {
+ .inode = mapping->host,
+ .wbc = wbc,
+ .can_map = 1,
+ };
+ int ret;
+
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return -EIO;
+
+ percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
+ ret = ext4_do_writepages(&mpd);
+ percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
+
+ return ret;
+}
+
+int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .range_start = jinode->i_dirty_start,
+ .range_end = jinode->i_dirty_end,
+ };
+ struct mpage_da_data mpd = {
+ .inode = jinode->i_vfs_inode,
+ .wbc = &wbc,
+ .can_map = 0,
+ };
+ return ext4_do_writepages(&mpd);
+}
+
static int ext4_dax_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
@@ -3646,7 +3725,6 @@ static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
static const struct address_space_operations ext4_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
- .writepage = ext4_writepage,
.writepages = ext4_writepages,
.write_begin = ext4_write_begin,
.write_end = ext4_write_end,
@@ -3664,7 +3742,6 @@ static const struct address_space_operations ext4_aops = {
static const struct address_space_operations ext4_journalled_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
- .writepage = ext4_writepage,
.writepages = ext4_writepages,
.write_begin = ext4_write_begin,
.write_end = ext4_journalled_write_end,
@@ -3673,6 +3750,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.invalidate_folio = ext4_journalled_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
+ .migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
.swap_activate = ext4_iomap_swap_activate,
@@ -3681,7 +3759,6 @@ static const struct address_space_operations ext4_journalled_aops = {
static const struct address_space_operations ext4_da_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
- .writepage = ext4_writepage,
.writepages = ext4_writepages,
.write_begin = ext4_da_write_begin,
.write_end = ext4_da_write_end,
@@ -4225,7 +4302,8 @@ int ext4_truncate(struct inode *inode)
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
- if (ext4_inode_attach_jinode(inode) < 0)
+ err = ext4_inode_attach_jinode(inode);
+ if (err)
goto out_trace;
}
@@ -4473,9 +4551,17 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
inode_offset = ((ino - 1) %
EXT4_INODES_PER_GROUP(sb));
- block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+ block = ext4_inode_table(sb, gdp);
+ if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) ||
+ (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) {
+ ext4_error(sb, "Invalid inode table block %llu in "
+ "block_group %u", block, iloc->block_group);
+ return -EFSCORRUPTED;
+ }
+ block += (inode_offset / inodes_per_block);
+
bh = sb_getblk(sb, block);
if (unlikely(!bh))
return -ENOMEM;
@@ -5044,8 +5130,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
- brelse(iloc.bh);
+ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
+ ext4_error_inode(inode, function, line, 0,
+ "bad inode without EXT4_IGET_BAD flag");
+ ret = -EUCLEAN;
+ goto bad_inode;
+ }
+ brelse(iloc.bh);
unlock_new_inode(inode);
return inode;
@@ -5550,7 +5642,7 @@ out_mmap_sem:
ext4_orphan_del(NULL, inode);
if (!error && (ia_valid & ATTR_MODE))
- rc = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
+ rc = posix_acl_chmod(mnt_userns, dentry, inode->i_mode);
err_out:
if (error)
@@ -5853,6 +5945,14 @@ static int __ext4_expand_extra_isize(struct inode *inode,
return 0;
}
+ /*
+ * We may need to allocate external xattr block so we need quotas
+ * initialized. Here we can be called with various locks held so we
+ * cannot affort to initialize quotas ourselves. So just bail.
+ */
+ if (dquot_initialize_needed(inode))
+ return -EAGAIN;
+
/* try to expand with EAs present */
error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
raw_inode, handle);