diff options
author | David Howells <dhowells@redhat.com> | 2018-04-27 20:46:22 +0100 |
---|---|---|
committer | David Howells <dhowells@redhat.com> | 2018-05-14 13:17:35 +0100 |
commit | b61f7dcf4eb2653e870c9079b02d11a0834cfe39 (patch) | |
tree | dc5bd17741a867d22407e0d55d05b1476d7dc42c /fs | |
parent | f0ab773f5c96c29a5227234c4b5a820f5591b74d (diff) | |
download | linux-b61f7dcf4eb2653e870c9079b02d11a0834cfe39.tar.bz2 |
afs: Fix directory page locking
The afs directory loading code (primarily afs_read_dir()) locks all the
pages that hold a directory's content blob to defend against
getdents/getdents races and getdents/lookup races where the competitors
issue conflicting reads on the same data. As the reads will complete
consecutively, they may retrieve different versions of the data and
one may overwrite the data that the other is busy parsing.
Fix this by not locking the pages at all, but rather by turning the
validation lock into an rwsem and getting an exclusive lock on it whilst
reading the data or validating the attributes and a shared lock whilst
parsing the data. Sharing the attribute validation lock should be fine as
the data fetch will retrieve the attributes also.
The individual page locks aren't needed at all as the only place they're
being used is to serialise data loading.
Without this patch, the:
if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
...
}
part of afs_read_dir() may be skipped, leaving the pages unlocked when we
hit the success: clause - in which case we try to unlock the not-locked
pages, leading to the following oops:
page:ffffe38b405b4300 count:3 mapcount:0 mapping:ffff98156c83a978 index:0x0
flags: 0xfffe000001004(referenced|private)
raw: 000fffe000001004 ffff98156c83a978 0000000000000000 00000003ffffffff
raw: dead000000000100 dead000000000200 0000000000000001 ffff98156b27c000
page dumped because: VM_BUG_ON_PAGE(!PageLocked(page))
page->mem_cgroup:ffff98156b27c000
------------[ cut here ]------------
kernel BUG at mm/filemap.c:1205!
...
RIP: 0010:unlock_page+0x43/0x50
...
Call Trace:
afs_dir_iterate+0x789/0x8f0 [kafs]
? _cond_resched+0x15/0x30
? kmem_cache_alloc_trace+0x166/0x1d0
? afs_do_lookup+0x69/0x490 [kafs]
? afs_do_lookup+0x101/0x490 [kafs]
? key_default_cmp+0x20/0x20
? request_key+0x3c/0x80
? afs_lookup+0xf1/0x340 [kafs]
? __lookup_slow+0x97/0x150
? lookup_slow+0x35/0x50
? walk_component+0x1bf/0x490
? path_lookupat.isra.52+0x75/0x200
? filename_lookup.part.66+0xa0/0x170
? afs_end_vnode_operation+0x41/0x60 [kafs]
? __check_object_size+0x9c/0x171
? strncpy_from_user+0x4a/0x170
? vfs_statx+0x73/0xe0
? __do_sys_newlstat+0x39/0x70
? __x64_sys_getdents+0xc9/0x140
? __x64_sys_getdents+0x140/0x140
? do_syscall_64+0x5b/0x160
? entry_SYSCALL_64_after_hwframe+0x44/0xa9
Fixes: f3ddee8dc4e2 ("afs: Fix directory handling")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/dir.c | 36 | ||||
-rw-r--r-- | fs/afs/inode.c | 6 | ||||
-rw-r--r-- | fs/afs/internal.h | 2 | ||||
-rw-r--r-- | fs/afs/super.c | 2 |
4 files changed, 22 insertions, 24 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5889f70d4d27..2853acd64482 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file) * get reclaimed during the iteration. */ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) + __acquires(&dvnode->validate_lock) { struct afs_read *req; loff_t i_size; @@ -261,18 +262,21 @@ retry: /* If we're going to reload, we need to lock all the pages to prevent * races. */ - if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { - ret = -ERESTARTSYS; - for (i = 0; i < req->nr_pages; i++) - if (lock_page_killable(req->pages[i]) < 0) - goto error_unlock; + ret = -ERESTARTSYS; + if (down_read_killable(&dvnode->validate_lock) < 0) + goto error; - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - goto success; + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; + + up_read(&dvnode->validate_lock); + if (down_write_killable(&dvnode->validate_lock) < 0) + goto error; + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { ret = afs_fetch_data(dvnode, key, req); if (ret < 0) - goto error_unlock_all; + goto error_unlock; task_io_account_read(PAGE_SIZE * req->nr_pages); @@ -284,33 +288,26 @@ retry: for (i = 0; i < req->nr_pages; i++) if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len)) - goto error_unlock_all; + goto error_unlock; // TODO: Trim excess pages set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); } + downgrade_write(&dvnode->validate_lock); success: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); return req; -error_unlock_all: - i = req->nr_pages; error_unlock: - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); error: afs_put_read(req); _leave(" = %d", ret); return ERR_PTR(ret); content_has_grown: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); afs_put_read(req); goto retry; } @@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, } out: + up_read(&dvnode->validate_lock); afs_put_read(req); _leave(" = %d", ret); return ret; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 06194cfe9724..e855c6e5cf28 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -415,7 +415,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (valid) goto valid; - mutex_lock(&vnode->validate_lock); + down_write(&vnode->validate_lock); /* if the promise has expired, we need to check the server again to get * a new promise - note that if the (parent) directory's metadata was @@ -444,13 +444,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * different */ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); valid: _leave(" = 0"); return 0; error_unlock: - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); _leave(" = %d", ret); return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f8086ec95e24..468be1e0dffb 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -494,7 +494,7 @@ struct afs_vnode { #endif struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ - struct mutex validate_lock; /* lock for validating this vnode */ + struct rw_semaphore validate_lock; /* lock for validating this vnode */ spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; diff --git a/fs/afs/super.c b/fs/afs/super.c index 65081ec3c36e..b02838cd9525 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); mutex_init(&vnode->io_lock); - mutex_init(&vnode->validate_lock); + init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); INIT_LIST_HEAD(&vnode->wb_keys); |