summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cell.c3
-rw-r--r--fs/afs/dir.c12
-rw-r--r--fs/afs/dir_edit.c6
-rw-r--r--fs/afs/file.c78
-rw-r--r--fs/afs/internal.h57
-rw-r--r--fs/afs/write.c105
-rw-r--r--fs/afs/xattr.c2
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/backref.c13
-rw-r--r--fs/btrfs/block-group.c1
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c139
-rw-r--r--fs/btrfs/disk-io.h3
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/inode.c8
-rw-r--r--fs/btrfs/qgroup.c18
-rw-r--r--fs/btrfs/reada.c47
-rw-r--r--fs/btrfs/tree-checker.c18
-rw-r--r--fs/btrfs/volumes.c5
-rw-r--r--fs/btrfs/volumes.h12
-rw-r--r--fs/cachefiles/rdwr.c3
-rw-r--r--fs/debugfs/file.c15
-rw-r--r--fs/ext4/dir.c64
-rw-r--r--fs/ext4/ext4.h20
-rw-r--r--fs/ext4/extents.c30
-rw-r--r--fs/ext4/fast_commit.c37
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/inode.c15
-rw-r--r--fs/ext4/namei.c20
-rw-r--r--fs/ext4/super.c16
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/hfs/btree.h2
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/io_uring.c108
-rw-r--r--fs/isofs/rock.h8
-rw-r--r--fs/select.c4
38 files changed, 538 insertions, 351 deletions
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 52233fa6195f..887b673f6223 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -589,7 +589,7 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
*/
void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason)
{
- unsigned int debug_id = cell->debug_id;
+ unsigned int debug_id;
time64_t now, expire_delay;
int u, a;
@@ -604,6 +604,7 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr
if (cell->vl_servers->nr_servers)
expire_delay = afs_cell_gc_delay;
+ debug_id = cell->debug_id;
u = atomic_read(&cell->ref);
a = atomic_dec_return(&cell->active);
trace_afs_cell(debug_id, u, a, reason);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1d2e61e0ab04..1bb5b9d7f0a2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -281,8 +281,7 @@ retry:
if (ret < 0)
goto error;
- set_page_private(req->pages[i], 1);
- SetPagePrivate(req->pages[i]);
+ attach_page_private(req->pages[i], (void *)1);
unlock_page(req->pages[i]);
i++;
} else {
@@ -1975,8 +1974,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
- set_page_private(page, 0);
- ClearPagePrivate(page);
+ detach_page_private(page);
/* The directory will need reloading. */
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
@@ -2003,8 +2001,6 @@ static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
afs_stat_v(dvnode, n_inval);
/* we clean up only if the entire page is being invalidated */
- if (offset == 0 && length == PAGE_SIZE) {
- set_page_private(page, 0);
- ClearPagePrivate(page);
- }
+ if (offset == 0 && length == PAGE_SIZE)
+ detach_page_private(page);
}
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index b108528bf010..2ffe09abae7f 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -243,10 +243,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
index, gfp);
if (!page)
goto error;
- if (!PagePrivate(page)) {
- set_page_private(page, 1);
- SetPagePrivate(page);
- }
+ if (!PagePrivate(page))
+ attach_page_private(page, (void *)1);
dir_page = kmap(page);
}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 371d1488cc54..85f5adf21aa0 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -33,6 +33,7 @@ const struct file_operations afs_file_operations = {
.write_iter = afs_file_write,
.mmap = afs_file_mmap,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.fsync = afs_fsync,
.lock = afs_lock,
.flock = afs_flock,
@@ -601,6 +602,63 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
}
/*
+ * Adjust the dirty region of the page on truncation or full invalidation,
+ * getting rid of the markers altogether if the region is entirely invalidated.
+ */
+static void afs_invalidate_dirty(struct page *page, unsigned int offset,
+ unsigned int length)
+{
+ struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+ unsigned long priv;
+ unsigned int f, t, end = offset + length;
+
+ priv = page_private(page);
+
+ /* we clean up only if the entire page is being invalidated */
+ if (offset == 0 && length == thp_size(page))
+ goto full_invalidate;
+
+ /* If the page was dirtied by page_mkwrite(), the PTE stays writable
+ * and we don't get another notification to tell us to expand it
+ * again.
+ */
+ if (afs_is_page_dirty_mmapped(priv))
+ return;
+
+ /* We may need to shorten the dirty region */
+ f = afs_page_dirty_from(priv);
+ t = afs_page_dirty_to(priv);
+
+ if (t <= offset || f >= end)
+ return; /* Doesn't overlap */
+
+ if (f < offset && t > end)
+ return; /* Splits the dirty region - just absorb it */
+
+ if (f >= offset && t <= end)
+ goto undirty;
+
+ if (f < offset)
+ t = offset;
+ else
+ f = end;
+ if (f == t)
+ goto undirty;
+
+ priv = afs_page_dirty(f, t);
+ set_page_private(page, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page->index, priv);
+ return;
+
+undirty:
+ trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page->index, priv);
+ clear_page_dirty_for_io(page);
+full_invalidate:
+ priv = (unsigned long)detach_page_private(page);
+ trace_afs_page_dirty(vnode, tracepoint_string("inval"), page->index, priv);
+}
+
+/*
* invalidate part or all of a page
* - release a page and clean up its private data if offset is 0 (indicating
* the entire page)
@@ -608,31 +666,23 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{
- struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
- unsigned long priv;
-
_enter("{%lu},%u,%u", page->index, offset, length);
BUG_ON(!PageLocked(page));
+#ifdef CONFIG_AFS_FSCACHE
/* we clean up only if the entire page is being invalidated */
if (offset == 0 && length == PAGE_SIZE) {
-#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page)) {
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
fscache_wait_on_page_write(vnode->cache, page);
fscache_uncache_page(vnode->cache, page);
}
+ }
#endif
- if (PagePrivate(page)) {
- priv = page_private(page);
- trace_afs_page_dirty(vnode, tracepoint_string("inval"),
- page->index, priv);
- set_page_private(page, 0);
- ClearPagePrivate(page);
- }
- }
+ if (PagePrivate(page))
+ afs_invalidate_dirty(page, offset, length);
_leave("");
}
@@ -660,11 +710,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
#endif
if (PagePrivate(page)) {
- priv = page_private(page);
+ priv = (unsigned long)detach_page_private(page);
trace_afs_page_dirty(vnode, tracepoint_string("rel"),
page->index, priv);
- set_page_private(page, 0);
- ClearPagePrivate(page);
}
/* indicate that the page can be released */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 81b0485fd22a..14d5d75f4b6e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -812,6 +812,7 @@ struct afs_operation {
pgoff_t last; /* last page in mapping to deal with */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
+ bool laundering; /* Laundering page, PG_writeback not set */
} store;
struct {
struct iattr *attr;
@@ -857,6 +858,62 @@ struct afs_vnode_cache_aux {
u64 data_version;
} __packed;
+/*
+ * We use page->private to hold the amount of the page that we've written to,
+ * splitting the field into two parts. However, we need to represent a range
+ * 0...PAGE_SIZE, so we reduce the resolution if the size of the page
+ * exceeds what we can encode.
+ */
+#ifdef CONFIG_64BIT
+#define __AFS_PAGE_PRIV_MASK 0x7fffffffUL
+#define __AFS_PAGE_PRIV_SHIFT 32
+#define __AFS_PAGE_PRIV_MMAPPED 0x80000000UL
+#else
+#define __AFS_PAGE_PRIV_MASK 0x7fffUL
+#define __AFS_PAGE_PRIV_SHIFT 16
+#define __AFS_PAGE_PRIV_MMAPPED 0x8000UL
+#endif
+
+static inline unsigned int afs_page_dirty_resolution(void)
+{
+ int shift = PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
+ return (shift > 0) ? shift : 0;
+}
+
+static inline size_t afs_page_dirty_from(unsigned long priv)
+{
+ unsigned long x = priv & __AFS_PAGE_PRIV_MASK;
+
+ /* The lower bound is inclusive */
+ return x << afs_page_dirty_resolution();
+}
+
+static inline size_t afs_page_dirty_to(unsigned long priv)
+{
+ unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK;
+
+ /* The upper bound is immediately beyond the region */
+ return (x + 1) << afs_page_dirty_resolution();
+}
+
+static inline unsigned long afs_page_dirty(size_t from, size_t to)
+{
+ unsigned int res = afs_page_dirty_resolution();
+ from >>= res;
+ to = (to - 1) >> res;
+ return (to << __AFS_PAGE_PRIV_SHIFT) | from;
+}
+
+static inline unsigned long afs_page_dirty_mmapped(unsigned long priv)
+{
+ return priv | __AFS_PAGE_PRIV_MMAPPED;
+}
+
+static inline bool afs_is_page_dirty_mmapped(unsigned long priv)
+{
+ return priv & __AFS_PAGE_PRIV_MMAPPED;
+}
+
#include <trace/events/afs.h>
/*****************************************************************************/
diff --git a/fs/afs/write.c b/fs/afs/write.c
index da12abd6db21..50371207f327 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -76,7 +76,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
*/
int afs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
+ struct page **_page, void **fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct page *page;
@@ -90,11 +90,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
_enter("{%llx:%llu},{%lx},%u,%u",
vnode->fid.vid, vnode->fid.vnode, index, from, to);
- /* We want to store information about how much of a page is altered in
- * page->private.
- */
- BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8);
-
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
@@ -110,9 +105,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
}
- /* page won't leak in error case: it eventually gets cleaned off LRU */
- *pagep = page;
-
try_again:
/* See if this page is already partially written in a way that we can
* merge the new write with.
@@ -120,8 +112,8 @@ try_again:
t = f = 0;
if (PagePrivate(page)) {
priv = page_private(page);
- f = priv & AFS_PRIV_MAX;
- t = priv >> AFS_PRIV_SHIFT;
+ f = afs_page_dirty_from(priv);
+ t = afs_page_dirty_to(priv);
ASSERTCMP(f, <=, t);
}
@@ -138,21 +130,9 @@ try_again:
if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
(to < f || from > t))
goto flush_conflicting_write;
- if (from < f)
- f = from;
- if (to > t)
- t = to;
- } else {
- f = from;
- t = to;
}
- priv = (unsigned long)t << AFS_PRIV_SHIFT;
- priv |= f;
- trace_afs_page_dirty(vnode, tracepoint_string("begin"),
- page->index, priv);
- SetPagePrivate(page);
- set_page_private(page, priv);
+ *_page = page;
_leave(" = 0");
return 0;
@@ -162,17 +142,18 @@ try_again:
flush_conflicting_write:
_debug("flush conflict");
ret = write_one_page(page);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
+ if (ret < 0)
+ goto error;
ret = lock_page_killable(page);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
+ if (ret < 0)
+ goto error;
goto try_again;
+
+error:
+ put_page(page);
+ _leave(" = %d", ret);
+ return ret;
}
/*
@@ -184,6 +165,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct key *key = afs_file_key(file);
+ unsigned long priv;
+ unsigned int f, from = pos & (PAGE_SIZE - 1);
+ unsigned int t, to = from + copied;
loff_t i_size, maybe_i_size;
int ret;
@@ -215,6 +199,25 @@ int afs_write_end(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
}
+ if (PagePrivate(page)) {
+ priv = page_private(page);
+ f = afs_page_dirty_from(priv);
+ t = afs_page_dirty_to(priv);
+ if (from < f)
+ f = from;
+ if (to > t)
+ t = to;
+ priv = afs_page_dirty(f, t);
+ set_page_private(page, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("dirty+"),
+ page->index, priv);
+ } else {
+ priv = afs_page_dirty(from, to);
+ attach_page_private(page, (void *)priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("dirty"),
+ page->index, priv);
+ }
+
set_page_dirty(page);
if (PageDirty(page))
_debug("dirtied");
@@ -334,10 +337,9 @@ static void afs_pages_written_back(struct afs_vnode *vnode,
ASSERTCMP(pv.nr, ==, count);
for (loop = 0; loop < count; loop++) {
- priv = page_private(pv.pages[loop]);
+ priv = (unsigned long)detach_page_private(pv.pages[loop]);
trace_afs_page_dirty(vnode, tracepoint_string("clear"),
pv.pages[loop]->index, priv);
- set_page_private(pv.pages[loop], 0);
end_page_writeback(pv.pages[loop]);
}
first += count;
@@ -396,7 +398,8 @@ static void afs_store_data_success(struct afs_operation *op)
op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
if (op->error == 0) {
- afs_pages_written_back(vnode, op->store.first, op->store.last);
+ if (!op->store.laundering)
+ afs_pages_written_back(vnode, op->store.first, op->store.last);
afs_stat_v(vnode, n_stores);
atomic_long_add((op->store.last * PAGE_SIZE + op->store.last_to) -
(op->store.first * PAGE_SIZE + op->store.first_offset),
@@ -415,7 +418,7 @@ static const struct afs_operation_ops afs_store_data_operation = {
*/
static int afs_store_data(struct address_space *mapping,
pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to)
+ unsigned offset, unsigned to, bool laundering)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct afs_operation *op;
@@ -448,6 +451,7 @@ static int afs_store_data(struct address_space *mapping,
op->store.last = last;
op->store.first_offset = offset;
op->store.last_to = to;
+ op->store.laundering = laundering;
op->mtime = vnode->vfs_inode.i_mtime;
op->flags |= AFS_OPERATION_UNINTR;
op->ops = &afs_store_data_operation;
@@ -509,8 +513,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
*/
start = primary_page->index;
priv = page_private(primary_page);
- offset = priv & AFS_PRIV_MAX;
- to = priv >> AFS_PRIV_SHIFT;
+ offset = afs_page_dirty_from(priv);
+ to = afs_page_dirty_to(priv);
trace_afs_page_dirty(vnode, tracepoint_string("store"),
primary_page->index, priv);
@@ -555,8 +559,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
}
priv = page_private(page);
- f = priv & AFS_PRIV_MAX;
- t = priv >> AFS_PRIV_SHIFT;
+ f = afs_page_dirty_from(priv);
+ t = afs_page_dirty_to(priv);
if (f != 0 &&
!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) {
unlock_page(page);
@@ -601,7 +605,7 @@ no_more:
if (end > i_size)
to = i_size & ~PAGE_MASK;
- ret = afs_store_data(mapping, first, last, offset, to);
+ ret = afs_store_data(mapping, first, last, offset, to, false);
switch (ret) {
case 0:
ret = count;
@@ -857,12 +861,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
*/
wait_on_page_writeback(vmf->page);
- priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */
- priv |= 0; /* From */
+ priv = afs_page_dirty(0, PAGE_SIZE);
+ priv = afs_page_dirty_mmapped(priv);
trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"),
vmf->page->index, priv);
- SetPagePrivate(vmf->page);
- set_page_private(vmf->page, priv);
+ if (PagePrivate(vmf->page))
+ set_page_private(vmf->page, priv);
+ else
+ attach_page_private(vmf->page, (void *)priv);
file_update_time(file);
sb_end_pagefault(inode->i_sb);
@@ -915,19 +921,18 @@ int afs_launder_page(struct page *page)
f = 0;
t = PAGE_SIZE;
if (PagePrivate(page)) {
- f = priv & AFS_PRIV_MAX;
- t = priv >> AFS_PRIV_SHIFT;
+ f = afs_page_dirty_from(priv);
+ t = afs_page_dirty_to(priv);
}
trace_afs_page_dirty(vnode, tracepoint_string("launder"),
page->index, priv);
- ret = afs_store_data(mapping, page->index, page->index, t, f);
+ ret = afs_store_data(mapping, page->index, page->index, t, f, true);
}
+ priv = (unsigned long)detach_page_private(page);
trace_afs_page_dirty(vnode, tracepoint_string("laundered"),
page->index, priv);
- set_page_private(page, 0);
- ClearPagePrivate(page);
#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page)) {
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 84f3c4f57531..38884d6c57cd 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -85,7 +85,7 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler,
if (acl->size <= size)
memcpy(buffer, acl->data, acl->size);
else
- op->error = -ERANGE;
+ ret = -ERANGE;
}
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index aabc11f099cf..ac0b5fc30ea6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1690,7 +1690,7 @@ struct elf_thread_core_info {
struct elf_thread_core_info *next;
struct task_struct *task;
struct elf_prstatus prstatus;
- struct memelfnote notes[0];
+ struct memelfnote notes[];
};
struct elf_note_info {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b3268f4ea5f3..771a036867dc 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
int level = ref->level;
struct btrfs_key search_key = ref->key_for_search;
- root = btrfs_get_fs_root(fs_info, ref->root_id, false);
+ /*
+ * If we're search_commit_root we could possibly be holding locks on
+ * other tree nodes. This happens when qgroups does backref walks when
+ * adding new delayed refs. To deal with this we need to look in cache
+ * for the root, and if we don't find it then we need to search the
+ * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
+ * here.
+ */
+ if (path->search_commit_root)
+ root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
+ else
+ root = btrfs_get_fs_root(fs_info, ref->root_id, false);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out_free;
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c0f1d6818df7..3ba6f3839d39 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2024,6 +2024,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
key.offset = 0;
btrfs_release_path(path);
}
+ btrfs_release_path(path);
list_for_each_entry(space_info, &info->space_info, list) {
int i;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index aac3d6f4e35b..0378933d163c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3564,6 +3564,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct extent_buffer *eb, int err);
+void btrfs_reada_remove_dev(struct btrfs_device *dev);
+void btrfs_reada_undo_remove_dev(struct btrfs_device *dev);
static inline int is_fstree(u64 rootid)
{
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 4a0243cb9d97..5b9e3f3ace22 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -688,6 +688,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
}
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+ if (!scrub_ret)
+ btrfs_reada_remove_dev(src_device);
+
/*
* We have to use this loop approach because at this point src_device
* has to be available for transaction commit to complete, yet new
@@ -696,6 +699,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
while (1) {
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
+ btrfs_reada_undo_remove_dev(src_device);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return PTR_ERR(trans);
}
@@ -746,6 +750,7 @@ error:
up_write(&dev_replace->rwsem);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ btrfs_reada_undo_remove_dev(src_device);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(tgt_device);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8e3438672a82..af97ddcc6b3e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1281,32 +1281,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return 0;
}
-struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
- struct btrfs_key *key)
+static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
+ struct btrfs_path *path,
+ struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_fs_info *fs_info = tree_root->fs_info;
- struct btrfs_path *path;
u64 generation;
int ret;
int level;
- path = btrfs_alloc_path();
- if (!path)
- return ERR_PTR(-ENOMEM);
-
root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
- if (!root) {
- ret = -ENOMEM;
- goto alloc_fail;
- }
+ if (!root)
+ return ERR_PTR(-ENOMEM);
ret = btrfs_find_root(tree_root, key, path,
&root->root_item, &root->root_key);
if (ret) {
if (ret > 0)
ret = -ENOENT;
- goto find_fail;
+ goto fail;
}
generation = btrfs_root_generation(&root->root_item);
@@ -1317,21 +1311,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
root->node = NULL;
- goto find_fail;
+ goto fail;
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO;
- goto find_fail;
+ goto fail;
}
root->commit_root = btrfs_root_node(root);
-out:
- btrfs_free_path(path);
return root;
-
-find_fail:
+fail:
btrfs_put_root(root);
-alloc_fail:
- root = ERR_PTR(ret);
- goto out;
+ return ERR_PTR(ret);
+}
+
+struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+ struct btrfs_key *key)
+{
+ struct btrfs_root *root;
+ struct btrfs_path *path;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return ERR_PTR(-ENOMEM);
+ root = read_tree_root_path(tree_root, path, key);
+ btrfs_free_path(path);
+
+ return root;
}
/*
@@ -1419,6 +1423,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
return root;
}
+static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
+ u64 objectid)
+{
+ if (objectid == BTRFS_ROOT_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->tree_root);
+ if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->extent_root);
+ if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->chunk_root);
+ if (objectid == BTRFS_DEV_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->dev_root);
+ if (objectid == BTRFS_CSUM_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->csum_root);
+ if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->quota_root) ?
+ fs_info->quota_root : ERR_PTR(-ENOENT);
+ if (objectid == BTRFS_UUID_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->uuid_root) ?
+ fs_info->uuid_root : ERR_PTR(-ENOENT);
+ if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+ return btrfs_grab_root(fs_info->free_space_root) ?
+ fs_info->free_space_root : ERR_PTR(-ENOENT);
+ return NULL;
+}
+
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root)
{
@@ -1518,25 +1547,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_key key;
int ret;
- if (objectid == BTRFS_ROOT_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->tree_root);
- if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->extent_root);
- if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->chunk_root);
- if (objectid == BTRFS_DEV_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->dev_root);
- if (objectid == BTRFS_CSUM_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->csum_root);
- if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->quota_root) ?
- fs_info->quota_root : ERR_PTR(-ENOENT);
- if (objectid == BTRFS_UUID_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->uuid_root) ?
- fs_info->uuid_root : ERR_PTR(-ENOENT);
- if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->free_space_root) ?
- fs_info->free_space_root : ERR_PTR(-ENOENT);
+ root = btrfs_get_global_root(fs_info, objectid);
+ if (root)
+ return root;
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
@@ -1622,6 +1635,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
}
/*
+ * btrfs_get_fs_root_commit_root - return a root for the given objectid
+ * @fs_info: the fs_info
+ * @objectid: the objectid we need to lookup
+ *
+ * This is exclusively used for backref walking, and exists specifically because
+ * of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref
+ * creation time, which means we may have to read the tree_root in order to look
+ * up a fs root that is not in memory. If the root is not in memory we will
+ * read the tree root commit root and look up the fs root from there. This is a
+ * temporary root, it will not be inserted into the radix tree as it doesn't
+ * have the most uptodate information, it'll simply be discarded once the
+ * backref code is finished using the root.
+ */
+struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ u64 objectid)
+{
+ struct btrfs_root *root;
+ struct btrfs_key key;
+
+ ASSERT(path->search_commit_root && path->skip_locking);
+
+ /*
+ * This can return -ENOENT if we ask for a root that doesn't exist, but
+ * since this is called via the backref walking code we won't be looking
+ * up a root that doesn't exist, unless there's corruption. So if root
+ * != NULL just return it.
+ */
+ root = btrfs_get_global_root(fs_info, objectid);
+ if (root)
+ return root;
+
+ root = btrfs_lookup_fs_root(fs_info, objectid);
+ if (root)
+ return root;
+
+ key.objectid = objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ root = read_tree_root_path(fs_info->tree_root, path, &key);
+ btrfs_release_path(path);
+
+ return root;
+}
+
+/*
* called by the kthread helper functions to finally call the bio end_io
* functions. This is where read checksum verification actually happens
*/
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index fee69ced58b4..182540bdcea0 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
u64 objectid, dev_t anon_dev);
+struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+ u64 objectid);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3b21fee13e77..5fd60b13f4f8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3185,7 +3185,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_tree_block_info *bi;
if (item_size < sizeof(*ei) + sizeof(*bi)) {
btrfs_crit(info,
-"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %lu",
+"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
key.objectid, key.type, key.offset,
owner_objectid, item_size,
sizeof(*ei) + sizeof(*bi));
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0ff659455b1e..87355a38a654 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3628,7 +3628,8 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
inode_lock_shared(inode);
ret = btrfs_direct_IO(iocb, to);
inode_unlock_shared(inode);
- if (ret < 0)
+ if (ret < 0 || !iov_iter_count(to) ||
+ iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 936c3137c646..da58c58ef9aa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9672,10 +9672,16 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
* clear_offset by our extent size.
*/
clear_offset += ins.offset;
- btrfs_dec_block_group_reservations(fs_info, ins.objectid);
last_alloc = ins.offset;
trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
+ /*
+ * Now that we inserted the prealloc extent we can finally
+ * decrement the number of reservations in the block group.
+ * If we did it before, we could race with relocation and have
+ * relocation miss the reserved extent, making it fail later.
+ */
+ btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_free_reserved_extent(fs_info, ins.objectid,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 580899bdb991..c54ea6586632 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1026,6 +1026,10 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.type == BTRFS_ROOT_REF_KEY) {
+
+ /* Release locks on tree_root before we access quota_root */
+ btrfs_release_path(path);
+
ret = add_qgroup_item(trans, quota_root,
found_key.offset);
if (ret) {
@@ -1044,6 +1048,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
+ ret = btrfs_search_slot_for_read(tree_root, &found_key,
+ path, 1, 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_free_path;
+ }
+ if (ret > 0) {
+ /*
+ * Shouldn't happen, but in case it does we
+ * don't need to do the btrfs_next_item, just
+ * continue.
+ */
+ continue;
+ }
}
ret = btrfs_next_item(tree_root, path);
if (ret < 0) {
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 9d4f5316a7e8..d9a166eb344e 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!dev->bdev)
continue;
+ if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
+ continue;
+
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
@@ -445,6 +448,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
}
have_zone = 1;
}
+ if (!have_zone)
+ radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
up_read(&fs_info->dev_replace.rwsem);
@@ -1020,3 +1025,45 @@ void btrfs_reada_detach(void *handle)
kref_put(&rc->refcnt, reada_control_release);
}
+
+/*
+ * Before removing a device (device replace or device remove ioctls), call this
+ * function to wait for all existing readahead requests on the device and to
+ * make sure no one queues more readahead requests for the device.
+ *
+ * Must be called without holding neither the device list mutex nor the device
+ * replace semaphore, otherwise it will deadlock.
+ */
+void btrfs_reada_remove_dev(struct btrfs_device *dev)
+{
+ struct btrfs_fs_info *fs_info = dev->fs_info;
+
+ /* Serialize with readahead extent creation at reada_find_extent(). */
+ spin_lock(&fs_info->reada_lock);
+ set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+ spin_unlock(&fs_info->reada_lock);
+
+ /*
+ * There might be readahead requests added to the radix trees which
+ * were not yet added to the readahead work queue. We need to start
+ * them and wait for their completion, otherwise we can end up with
+ * use-after-free problems when dropping the last reference on the
+ * readahead extents and their zones, as they need to access the
+ * device structure.
+ */
+ reada_start_machine(fs_info);
+ btrfs_flush_workqueue(fs_info->readahead_workers);
+}
+
+/*
+ * If when removing a device (device replace or device remove ioctls) an error
+ * happens after calling btrfs_reada_remove_dev(), call this to undo what that
+ * function did. This is safe to call even if btrfs_reada_remove_dev() was not
+ * called before.
+ */
+void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
+{
+ spin_lock(&dev->fs_info->reada_lock);
+ clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+ spin_unlock(&dev->fs_info->reada_lock);
+}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index f0ffd5ee77bd..8784b74f5232 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -760,18 +760,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
u64 type;
u64 features;
bool mixed = false;
+ int raid_index;
+ int nparity;
+ int ncopies;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
type = btrfs_chunk_type(leaf, chunk);
+ raid_index = btrfs_bg_flags_to_raid_index(type);
+ ncopies = btrfs_raid_array[raid_index].ncopies;
+ nparity = btrfs_raid_array[raid_index].nparity;
if (!num_stripes) {
chunk_err(leaf, chunk, logical,
"invalid chunk num_stripes, have %u", num_stripes);
return -EUCLEAN;
}
+ if (num_stripes < ncopies) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk num_stripes < ncopies, have %u < %d",
+ num_stripes, ncopies);
+ return -EUCLEAN;
+ }
+ if (nparity && num_stripes == nparity) {
+ chunk_err(leaf, chunk, logical,
+ "invalid chunk num_stripes == nparity, have %u == %d",
+ num_stripes, nparity);
+ return -EUCLEAN;
+ }
if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
chunk_err(leaf, chunk, logical,
"invalid chunk logical, have %llu should aligned to %u",
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 58b9c419a2b6..b1e48078c318 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -431,7 +431,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
- btrfs_device_data_ordered_init(dev);
+ btrfs_device_data_ordered_init(dev, fs_info);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
extent_io_tree_init(fs_info, &dev->alloc_state,
@@ -2099,6 +2099,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
mutex_unlock(&uuid_mutex);
ret = btrfs_shrink_device(device, 0);
+ if (!ret)
+ btrfs_reada_remove_dev(device);
mutex_lock(&uuid_mutex);
if (ret)
goto error_undo;
@@ -2179,6 +2181,7 @@ out:
return ret;
error_undo:
+ btrfs_reada_undo_remove_dev(device);
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
list_add(&device->dev_alloc_list,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bf27ac07d315..232f02bd214f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -39,10 +39,10 @@ struct btrfs_io_geometry {
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
#include <linux/seqlock.h>
#define __BTRFS_NEED_DEVICE_DATA_ORDERED
-#define btrfs_device_data_ordered_init(device) \
- seqcount_init(&device->data_seqcount)
+#define btrfs_device_data_ordered_init(device, info) \
+ seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex)
#else
-#define btrfs_device_data_ordered_init(device) do { } while (0)
+#define btrfs_device_data_ordered_init(device, info) do { } while (0)
#endif
#define BTRFS_DEV_STATE_WRITEABLE (0)
@@ -50,6 +50,7 @@ struct btrfs_io_geometry {
#define BTRFS_DEV_STATE_MISSING (2)
#define BTRFS_DEV_STATE_REPLACE_TGT (3)
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
+#define BTRFS_DEV_STATE_NO_READA (5)
struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */
@@ -71,7 +72,8 @@ struct btrfs_device {
blk_status_t last_flush_error;
#ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
- seqcount_t data_seqcount;
+ /* A seqcount_t with associated chunk_mutex (for lockdep) */
+ seqcount_mutex_t data_seqcount;
#endif
/* the internal btrfs device id */
@@ -162,11 +164,9 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \
static inline void \
btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
{ \
- preempt_disable(); \
write_seqcount_begin(&dev->data_seqcount); \
dev->name = size; \
write_seqcount_end(&dev->data_seqcount); \
- preempt_enable(); \
}
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
#define BTRFS_DEVICE_GETSET_FUNCS(name) \
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 3080cda9e824..8bda092e60c5 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -121,7 +121,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
_debug("reissue read");
ret = bmapping->a_ops->readpage(NULL, backpage);
if (ret < 0)
- goto unlock_discard;
+ goto discard;
}
/* but the page may have been read before the monitor was installed, so
@@ -138,6 +138,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
unlock_discard:
unlock_page(backpage);
+discard:
spin_lock_irq(&object->work_lock);
list_del(&monitor->op_link);
spin_unlock_irq(&object->work_lock);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index a768a09430c3..686e0ad28788 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -1127,24 +1127,23 @@ static const struct file_operations debugfs_devm_entry_ops = {
* file will be created in the root of the debugfs filesystem.
* @read_fn: function pointer called to print the seq_file content.
*/
-struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name,
- struct dentry *parent,
- int (*read_fn)(struct seq_file *s,
- void *data))
+void debugfs_create_devm_seqfile(struct device *dev, const char *name,
+ struct dentry *parent,
+ int (*read_fn)(struct seq_file *s, void *data))
{
struct debugfs_devm_entry *entry;
if (IS_ERR(parent))
- return ERR_PTR(-ENOENT);
+ return;
entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL);
if (!entry)
- return ERR_PTR(-ENOMEM);
+ return;
entry->read = read_fn;
entry->dev = dev;
- return debugfs_create_file(name, S_IRUGO, parent, entry,
- &debugfs_devm_entry_ops);
+ debugfs_create_file(name, S_IRUGO, parent, entry,
+ &debugfs_devm_entry_ops);
}
EXPORT_SYMBOL_GPL(debugfs_create_devm_seqfile);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 5b81f3b080ee..ca50c90adc4c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -669,68 +669,8 @@ const struct file_operations ext4_dir_operations = {
};
#ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
- struct qstr qstr = {.name = str, .len = len };
- const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *inode = d_inode_rcu(parent);
- char strbuf[DNAME_INLINE_LEN];
-
- if (!inode || !IS_CASEFOLDED(inode) ||
- !EXT4_SB(inode->i_sb)->s_encoding) {
- if (len != name->len)
- return -1;
- return memcmp(str, name->name, len);
- }
-
- /*
- * If the dentry name is stored in-line, then it may be concurrently
- * modified by a rename. If this happens, the VFS will eventually retry
- * the lookup, so it doesn't matter what ->d_compare() returns.
- * However, it's unsafe to call utf8_strncasecmp() with an unstable
- * string. Therefore, we have to copy the name into a temporary buffer.
- */
- if (len <= DNAME_INLINE_LEN - 1) {
- memcpy(strbuf, str, len);
- strbuf[len] = 0;
- qstr.name = strbuf;
- /* prevent compiler from optimizing out the temporary buffer */
- barrier();
- }
-
- return ext4_ci_compare(inode, name, &qstr, false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
- const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
- const struct unicode_map *um = sbi->s_encoding;
- const struct inode *inode = d_inode_rcu(dentry);
- unsigned char *norm;
- int len, ret = 0;
-
- if (!inode || !IS_CASEFOLDED(inode) || !um)
- return 0;
-
- norm = kmalloc(PATH_MAX, GFP_ATOMIC);
- if (!norm)
- return -ENOMEM;
-
- len = utf8_casefold(um, str, norm, PATH_MAX);
- if (len < 0) {
- if (ext4_has_strict_mode(sbi))
- ret = -EINVAL;
- goto out;
- }
- str->hash = full_name_hash(dentry, norm, len);
-out:
- kfree(norm);
- return ret;
-}
-
const struct dentry_operations ext4_dentry_ops = {
- .d_hash = ext4_d_hash,
- .d_compare = ext4_d_compare,
+ .d_hash = generic_ci_d_hash,
+ .d_compare = generic_ci_d_compare,
};
#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 254d1c26bea8..45fcdbf538d1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1166,10 +1166,6 @@ struct ext4_inode_info {
#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
#define EXT4_ERROR_FS 0x0002 /* Errors detected */
#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
-#define EXT4_FC_INELIGIBLE 0x0008 /* Fast commit ineligible */
-#define EXT4_FC_COMMITTING 0x0010 /* File system underoing a fast
- * commit.
- */
#define EXT4_FC_REPLAY 0x0020 /* Fast commit replay ongoing */
/*
@@ -1431,6 +1427,10 @@ struct ext4_super_block {
*/
#define EXT4_MF_MNTDIR_SAMPLED 0x0001
#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
+#define EXT4_MF_FC_INELIGIBLE 0x0004 /* Fast commit ineligible */
+#define EXT4_MF_FC_COMMITTING 0x0008 /* File system underoing a fast
+ * commit.
+ */
#ifdef CONFIG_FS_ENCRYPTION
#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL)
@@ -1444,14 +1444,6 @@ struct ext4_super_block {
#define EXT4_ENC_UTF8_12_1 1
/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL (1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
- (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
-/*
* fourth extended-fs super-block data in memory
*/
struct ext4_sb_info {
@@ -1500,10 +1492,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
- struct unicode_map *s_encoding;
- __u16 s_encoding_flags;
-#endif
/* Journaling */
struct journal_s *s_journal;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 559100f3e23c..57cfa28919a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1471,16 +1471,16 @@ static int ext4_ext_search_left(struct inode *inode,
}
/*
- * search the closest allocated block to the right for *logical
- * and returns it at @logical + it's physical address at @phys
- * if *logical is the largest allocated block, the function
- * returns 0 at @phys
- * return value contains 0 (success) or error code
+ * Search the closest allocated block to the right for *logical
+ * and returns it at @logical + it's physical address at @phys.
+ * If not exists, return 0 and @phys is set to 0. We will return
+ * 1 which means we found an allocated block and ret_ex is valid.
+ * Or return a (< 0) error code.
*/
static int ext4_ext_search_right(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys,
- struct ext4_extent **ret_ex)
+ struct ext4_extent *ret_ex)
{
struct buffer_head *bh = NULL;
struct ext4_extent_header *eh;
@@ -1574,10 +1574,11 @@ got_index:
found_extent:
*logical = le32_to_cpu(ex->ee_block);
*phys = ext4_ext_pblock(ex);
- *ret_ex = ex;
+ if (ret_ex)
+ *ret_ex = *ex;
if (bh)
put_bh(bh);
- return 0;
+ return 1;
}
/*
@@ -2868,8 +2869,8 @@ again:
*/
lblk = ex_end + 1;
err = ext4_ext_search_right(inode, path, &lblk, &pblk,
- &ex);
- if (err)
+ NULL);
+ if (err < 0)
goto out;
if (pblk) {
partial.pclu = EXT4_B2C(sbi, pblk);
@@ -4039,7 +4040,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags)
{
struct ext4_ext_path *path = NULL;
- struct ext4_extent newex, *ex, *ex2;
+ struct ext4_extent newex, *ex, ex2;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_fsblk_t newblock = 0, pblk;
int err = 0, depth, ret;
@@ -4175,15 +4176,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (err)
goto out;
ar.lright = map->m_lblk;
- ex2 = NULL;
err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
- if (err)
+ if (err < 0)
goto out;
/* Check if the extent after searching to the right implies a
* cluster we can use. */
- if ((sbi->s_cluster_ratio > 1) && ex2 &&
- get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
+ if ((sbi->s_cluster_ratio > 1) && err &&
+ get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
ar.len = allocated = map->m_len;
newblock = map->m_pblk;
goto got_allocated_blocks;
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 447c8d93f480..8d43058386c3 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -269,7 +269,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
- sbi->s_mount_state |= EXT4_FC_INELIGIBLE;
+ sbi->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -292,7 +292,7 @@ void ext4_fc_start_ineligible(struct super_block *sb, int reason)
}
/*
- * Stop a fast commit ineligible update. We set EXT4_FC_INELIGIBLE flag here
+ * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
* to ensure that after stopping the ineligible update, at least one full
* commit takes place.
*/
@@ -302,13 +302,13 @@ void ext4_fc_stop_ineligible(struct super_block *sb)
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return;
- EXT4_SB(sb)->s_mount_state |= EXT4_FC_INELIGIBLE;
+ EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
}
static inline int ext4_fc_is_ineligible(struct super_block *sb)
{
- return (EXT4_SB(sb)->s_mount_state & EXT4_FC_INELIGIBLE) ||
+ return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) ||
atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
}
@@ -358,7 +358,7 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list,
- (sbi->s_mount_state & EXT4_FC_COMMITTING) ?
+ (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
&sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock);
@@ -411,7 +411,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.len = dentry->d_name.len;
spin_lock(&sbi->s_fc_lock);
- if (sbi->s_mount_state & EXT4_FC_COMMITTING)
+ if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
else
@@ -846,7 +846,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
int ret = 0;
spin_lock(&sbi->s_fc_lock);
- sbi->s_mount_state |= EXT4_FC_COMMITTING;
+ sbi->s_mount_flags |= EXT4_MF_FC_COMMITTING;
list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
@@ -964,7 +964,6 @@ static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
fc_dentry->fcd_parent, fc_dentry->fcd_ino,
fc_dentry->fcd_name.len,
fc_dentry->fcd_name.name, crc)) {
- spin_lock(&sbi->s_fc_lock);
ret = -ENOSPC;
goto lock_and_exit;
}
@@ -1191,8 +1190,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
&sbi->s_fc_q[FC_Q_STAGING]);
- sbi->s_mount_state &= ~EXT4_FC_COMMITTING;
- sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE;
+ sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
+ sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
if (full)
sbi->s_fc_bytes = 0;
@@ -1617,8 +1616,10 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
if (ret == 0) {
/* Range is not mapped */
path = ext4_find_extent(inode, cur, NULL, 0);
- if (!path)
- continue;
+ if (IS_ERR(path)) {
+ iput(inode);
+ return 0;
+ }
memset(&newex, 0, sizeof(newex));
newex.ee_block = cpu_to_le32(cur);
ext4_ext_store_pblock(
@@ -2078,6 +2079,8 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
void ext4_fc_init(struct super_block *sb, journal_t *journal)
{
+ int num_fc_blocks;
+
/*
* We set replay callback even if fast commit disabled because we may
* could still have fast commit blocks that need to be replayed even if
@@ -2087,7 +2090,15 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
return;
journal->j_fc_cleanup_callback = ext4_fc_cleanup;
- if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) {
+ if (!buffer_uptodate(journal->j_sb_buffer)
+ && ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO,
+ true)) {
+ ext4_msg(sb, KERN_ERR, "I/O error on journal");
+ return;
+ }
+ num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
+ if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
+ EXT4_NUM_FC_BLKS)) {
pr_warn("Error while enabling fast commits, turning off.");
ext4_clear_feature_fast_commit(sb);
}
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 2924261226e0..a92eb79de0cc 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
struct dx_hash_info *hinfo)
{
#ifdef CONFIG_UNICODE
- const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+ const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03c2253005f0..b96a18679a27 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1918,7 +1918,7 @@ static int __ext4_journalled_writepage(struct page *page,
}
if (ret == 0)
ret = err;
- err = ext4_jbd2_inode_add_write(handle, inode, 0, len);
+ err = ext4_jbd2_inode_add_write(handle, inode, page_offset(page), len);
if (ret == 0)
ret = err;
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
@@ -3307,10 +3307,12 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
if (journal) {
if (jbd2_transaction_committed(journal,
- EXT4_I(inode)->i_datasync_tid))
- return true;
- return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) >=
- EXT4_I(inode)->i_fc_committed_subtid;
+ EXT4_I(inode)->i_datasync_tid))
+ return false;
+ if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
+ return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) <
+ EXT4_I(inode)->i_fc_committed_subtid;
+ return true;
}
/* Any metadata buffers to write? */
@@ -6157,7 +6159,8 @@ retry_alloc:
if (ext4_walk_page_buffers(handle, page_buffers(page),
0, len, NULL, write_end_fn))
goto out_error;
- if (ext4_jbd2_inode_add_write(handle, inode, 0, len))
+ if (ext4_jbd2_inode_add_write(handle, inode,
+ page_offset(page), len))
goto out_error;
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
} else {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5159830dacb8..f458d1d81d96 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1285,8 +1285,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
{
- const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
+ const struct super_block *sb = parent->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
int ret;
if (quick)
@@ -1298,7 +1298,7 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
*/
- if (ext4_has_strict_mode(sbi))
+ if (sb_has_strict_encoding(sb))
return -EINVAL;
if (name->len != entry->len)
@@ -1315,7 +1315,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
{
int len;
- if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) {
+ if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding) {
cf_name->name = NULL;
return;
}
@@ -1324,7 +1324,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
if (!cf_name->name)
return;
- len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+ len = utf8_casefold(dir->i_sb->s_encoding,
iname, cf_name->name,
EXT4_NAME_LEN);
if (len <= 0) {
@@ -1361,7 +1361,7 @@ static inline bool ext4_match(const struct inode *parent,
#endif
#ifdef CONFIG_UNICODE
- if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+ if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent)) {
if (fname->cf_name.name) {
struct qstr cf = {.name = fname->cf_name.name,
.len = fname->cf_name.len};
@@ -2180,9 +2180,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct super_block *sb;
-#ifdef CONFIG_UNICODE
- struct ext4_sb_info *sbi;
-#endif
struct ext4_filename fname;
int retval;
int dx_fallback=0;
@@ -2199,9 +2196,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return -EINVAL;
#ifdef CONFIG_UNICODE
- sbi = EXT4_SB(sb);
- if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
- sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name))
+ if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
+ sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
return -EINVAL;
#endif
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2fe141ff3c7e..ef4734b40e2a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1288,7 +1288,7 @@ static void ext4_put_super(struct super_block *sb)
fs_put_dax(sbi->s_daxdev);
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
kfree(sbi);
}
@@ -4303,7 +4303,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
#ifdef CONFIG_UNICODE
- if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) {
+ if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
const struct ext4_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
@@ -4334,8 +4334,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
- sbi->s_encoding = encoding;
- sbi->s_encoding_flags = encoding_flags;
+ sb->s_encoding = encoding;
+ sb->s_encoding_flags = encoding_flags;
}
#endif
@@ -4777,8 +4777,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
sbi->s_fc_bytes = 0;
- sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE;
- sbi->s_mount_state &= ~EXT4_FC_COMMITTING;
+ sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
+ sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL;
@@ -4975,7 +4975,7 @@ no_journal:
}
#ifdef CONFIG_UNICODE
- if (sbi->s_encoding)
+ if (sb->s_encoding)
sb->s_d_op = &ext4_dentry_ops;
#endif
@@ -5184,7 +5184,7 @@ failed_mount:
crypto_free_shash(sbi->s_chksum_driver);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
#ifdef CONFIG_QUOTA
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 5ff33d18996a..4e27fe6ed3ae 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -315,6 +315,7 @@ EXT4_ATTR_FEATURE(casefold);
EXT4_ATTR_FEATURE(verity);
#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
+EXT4_ATTR_FEATURE(fast_commit);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
@@ -331,6 +332,7 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(verity),
#endif
ATTR_LIST(metadata_csum_seed),
+ ATTR_LIST(fast_commit),
NULL,
};
ATTRIBUTE_GROUPS(ext4_feat);
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h
index dcc2aab1b2c4..4ba45caf5939 100644
--- a/fs/hfs/btree.h
+++ b/fs/hfs/btree.h
@@ -60,7 +60,7 @@ struct hfs_bnode {
wait_queue_head_t lock_wq;
atomic_t refcnt;
unsigned int page_offset;
- struct page *page[0];
+ struct page *page[];
};
#define HFS_BNODE_ERROR 0
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 3b03fff68543..a92de5199ec3 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -117,7 +117,7 @@ struct hfs_bnode {
wait_queue_head_t lock_wq;
atomic_t refcnt;
unsigned int page_offset;
- struct page *page[0];
+ struct page *page[];
};
#define HFS_BNODE_LOCK 0
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b42dfa0243bf..a7429c977eb3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1365,6 +1365,9 @@ static void io_prep_async_work(struct io_kiocb *req)
io_req_init_async(req);
id = req->work.identity;
+ if (req->flags & REQ_F_FORCE_ASYNC)
+ req->work.flags |= IO_WQ_WORK_CONCURRENT;
+
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
@@ -1846,59 +1849,39 @@ static void __io_free_req(struct io_kiocb *req)
percpu_ref_put(&ctx->refs);
}
-static bool io_link_cancel_timeout(struct io_kiocb *req)
+static void io_kill_linked_timeout(struct io_kiocb *req)
{
- struct io_timeout_data *io = req->async_data;
struct io_ring_ctx *ctx = req->ctx;
- int ret;
-
- ret = hrtimer_try_to_cancel(&io->timer);
- if (ret != -1) {
- io_cqring_fill_event(req, -ECANCELED);
- io_commit_cqring(ctx);
- req->flags &= ~REQ_F_LINK_HEAD;
- io_put_req_deferred(req, 1);
- return true;
- }
-
- return false;
-}
-
-static bool __io_kill_linked_timeout(struct io_kiocb *req)
-{
struct io_kiocb *link;
- bool wake_ev;
+ bool cancelled = false;
+ unsigned long flags;
- if (list_empty(&req->link_list))
- return false;
- link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
- if (link->opcode != IORING_OP_LINK_TIMEOUT)
- return false;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ link = list_first_entry_or_null(&req->link_list, struct io_kiocb,
+ link_list);
/*
* Can happen if a linked timeout fired and link had been like
* req -> link t-out -> link t-out [-> ...]
*/
- if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE))
- return false;
+ if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
+ struct io_timeout_data *io = link->async_data;
+ int ret;
- list_del_init(&link->link_list);
- wake_ev = io_link_cancel_timeout(link);
+ list_del_init(&link->link_list);
+ ret = hrtimer_try_to_cancel(&io->timer);
+ if (ret != -1) {
+ io_cqring_fill_event(link, -ECANCELED);
+ io_commit_cqring(ctx);
+ cancelled = true;
+ }
+ }
req->flags &= ~REQ_F_LINK_TIMEOUT;
- return wake_ev;
-}
-
-static void io_kill_linked_timeout(struct io_kiocb *req)
-{
- struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
- bool wake_ev;
-
- spin_lock_irqsave(&ctx->completion_lock, flags);
- wake_ev = __io_kill_linked_timeout(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
- if (wake_ev)
+ if (cancelled) {
io_cqring_ev_posted(ctx);
+ io_put_req(link);
+ }
}
static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
@@ -4977,8 +4960,10 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
/* make sure double remove sees this as being gone */
wait->private = NULL;
spin_unlock(&poll->head->lock);
- if (!done)
- __io_async_wake(req, poll, mask, io_poll_task_func);
+ if (!done) {
+ /* use wait func handler, so it matches the rq type */
+ poll->wait.func(&poll->wait, mode, sync, key);
+ }
}
refcount_dec(&req->refs);
return 1;
@@ -6180,7 +6165,6 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
{
struct io_kiocb *linked_timeout;
- struct io_kiocb *nxt;
const struct cred *old_creds = NULL;
int ret;
@@ -6206,7 +6190,6 @@ again:
*/
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
if (!io_arm_poll_handler(req)) {
-punt:
/*
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
@@ -6216,33 +6199,25 @@ punt:
if (linked_timeout)
io_queue_linked_timeout(linked_timeout);
- goto exit;
- }
+ } else if (likely(!ret)) {
+ /* drop submission reference */
+ req = io_put_req_find_next(req);
+ if (linked_timeout)
+ io_queue_linked_timeout(linked_timeout);
- if (unlikely(ret)) {
+ if (req) {
+ if (!(req->flags & REQ_F_FORCE_ASYNC))
+ goto again;
+ io_queue_async_work(req);
+ }
+ } else {
/* un-prep timeout, so it'll be killed as any other linked */
req->flags &= ~REQ_F_LINK_TIMEOUT;
req_set_fail_links(req);
io_put_req(req);
io_req_complete(req, ret);
- goto exit;
}
- /* drop submission reference */
- nxt = io_put_req_find_next(req);
- if (linked_timeout)
- io_queue_linked_timeout(linked_timeout);
-
- if (nxt) {
- req = nxt;
-
- if (req->flags & REQ_F_FORCE_ASYNC) {
- linked_timeout = NULL;
- goto punt;
- }
- goto again;
- }
-exit:
if (old_creds)
revert_creds(old_creds);
}
@@ -6266,13 +6241,6 @@ fail_req:
if (unlikely(ret))
goto fail_req;
}
-
- /*
- * Never try inline submit of IOSQE_ASYNC is set, go straight
- * to async execution.
- */
- io_req_init_async(req);
- req->work.flags |= IO_WQ_WORK_CONCURRENT;
io_queue_async_work(req);
} else {
if (sqe) {
diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h
index 1558cf22ef8a..ee9660e9671c 100644
--- a/fs/isofs/rock.h
+++ b/fs/isofs/rock.h
@@ -22,7 +22,7 @@ struct SU_ER_s {
__u8 len_des;
__u8 len_src;
__u8 ext_ver;
- __u8 data[0];
+ __u8 data[];
} __attribute__ ((packed));
struct RR_RR_s {
@@ -44,7 +44,7 @@ struct RR_PN_s {
struct SL_component {
__u8 flags;
__u8 len;
- __u8 text[0];
+ __u8 text[];
} __attribute__ ((packed));
struct RR_SL_s {
@@ -54,7 +54,7 @@ struct RR_SL_s {
struct RR_NM_s {
__u8 flags;
- char name[0];
+ char name[];
} __attribute__ ((packed));
struct RR_CL_s {
@@ -71,7 +71,7 @@ struct stamp {
struct RR_TF_s {
__u8 flags;
- struct stamp times[0]; /* Variable number of these beasts */
+ struct stamp times[]; /* Variable number of these beasts */
} __attribute__ ((packed));
/* Linux-specific extension for transparent decompression */
diff --git a/fs/select.c b/fs/select.c
index 7aef49552d4c..ebfebdfe5c69 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -97,7 +97,7 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
- struct poll_table_entry entries[0];
+ struct poll_table_entry entries[];
};
#define POLL_TABLE_FULL(table) \
@@ -836,7 +836,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
struct poll_list {
struct poll_list *next;
int len;
- struct pollfd entries[0];
+ struct pollfd entries[];
};
#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))