diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-16 11:23:34 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-16 11:23:34 -0800 |
commit | 59331c215daf600a650e281b6e8ef3e1ed1174c2 (patch) | |
tree | f62bf3253fe48d7dd8b09b040c8e7fd9b2c02a86 /fs/ceph/addr.c | |
parent | ff0f962ca3c38239b299a70e7eea27abfbb979c3 (diff) | |
parent | 45ee2c1d66185e5bd27702c60cce3c43fa3370d2 (diff) | |
download | linux-59331c215daf600a650e281b6e8ef3e1ed1174c2.tar.bz2 |
Merge tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov:
"A varied set of changes:
- a large rework of cephx auth code to cope with CONFIG_VMAP_STACK
(myself). Also fixed a deadlock caused by a bogus allocation on the
writeback path and authorize reply verification.
- a fix for long stalls during fsync (Jeff Layton). The client now
has a way to force the MDS log flush, leading to ~100x speedups in
some synthetic tests.
- a new [no]require_active_mds mount option (Zheng Yan).
On mount, we will now check whether any of the MDSes are available
and bail rather than block if none are. This check can be avoided
by specifying the "no" option.
- a couple of MDS cap handling fixes and a few assorted patches
throughout"
* tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client: (32 commits)
libceph: remove now unused finish_request() wrapper
libceph: always signal completion when done
ceph: avoid creating orphan object when checking pool permission
ceph: properly set issue_seq for cap release
ceph: add flags parameter to send_cap_msg
ceph: update cap message struct version to 10
ceph: define new argument structure for send_cap_msg
ceph: move xattr initialzation before the encoding past the ceph_mds_caps
ceph: fix minor typo in unsafe_request_wait
ceph: record truncate size/seq for snap data writeback
ceph: check availability of mds cluster on mount
ceph: fix splice read for no Fc capability case
ceph: try getting buffer capability for readahead/fadvise
ceph: fix scheduler warning due to nested blocking
ceph: fix printing wrong return variable in ceph_direct_read_write()
crush: include mapper.h in mapper.c
rbd: silence bogus -Wmaybe-uninitialized warning
libceph: no need to drop con->mutex for ->get_authorizer()
libceph: drop len argument of *verify_authorizer_reply()
libceph: verify authorize reply on connect
...
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 98 |
1 files changed, 75 insertions, 23 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ef3ebd780aff..a0f1e2b91c8e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -315,7 +315,32 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) struct page **pages; pgoff_t next_index; int nr_pages = 0; - int ret; + int got = 0; + int ret = 0; + + if (!current->journal_info) { + /* caller of readpages does not hold buffer and read caps + * (fadvise, madvise and readahead cases) */ + int want = CEPH_CAP_FILE_CACHE; + ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, &got); + if (ret < 0) { + dout("start_read %p, error getting cap\n", inode); + } else if (!(got & want)) { + dout("start_read %p, no cache cap\n", inode); + ret = 0; + } + if (ret <= 0) { + if (got) + ceph_put_cap_refs(ci, got); + while (!list_empty(page_list)) { + page = list_entry(page_list->prev, + struct page, lru); + list_del(&page->lru); + put_page(page); + } + return ret; + } + } off = (u64) page_offset(page); @@ -338,15 +363,18 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); - if (IS_ERR(req)) - return PTR_ERR(req); + if (IS_ERR(req)) { + ret = PTR_ERR(req); + goto out; + } /* build page vector */ nr_pages = calc_pages_for(0, len); pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); - ret = -ENOMEM; - if (!pages) - goto out; + if (!pages) { + ret = -ENOMEM; + goto out_put; + } for (i = 0; i < nr_pages; ++i) { page = list_entry(page_list->prev, struct page, lru); BUG_ON(PageLocked(page)); @@ -378,6 +406,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) if (ret < 0) goto out_pages; ceph_osdc_put_request(req); + + /* After adding locked pages to page cache, the inode holds cache cap. + * So we can drop our cap refs. */ + if (got) + ceph_put_cap_refs(ci, got); + return nr_pages; out_pages: @@ -386,8 +420,11 @@ out_pages: unlock_page(pages[i]); } ceph_put_page_vector(pages, nr_pages, false); -out: +out_put: ceph_osdc_put_request(req); +out: + if (got) + ceph_put_cap_refs(ci, got); return ret; } @@ -424,7 +461,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, rc = start_read(inode, page_list, max); if (rc < 0) goto out; - BUG_ON(rc == 0); } out: ceph_fscache_readpages_cancel(inode, page_list); @@ -438,7 +474,9 @@ out: * only snap context we are allowed to write back. */ static struct ceph_snap_context *get_oldest_context(struct inode *inode, - loff_t *snap_size) + loff_t *snap_size, + u64 *truncate_size, + u32 *truncate_seq) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc = NULL; @@ -452,6 +490,10 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, snapc = ceph_get_snap_context(capsnap->context); if (snap_size) *snap_size = capsnap->size; + if (truncate_size) + *truncate_size = capsnap->truncate_size; + if (truncate_seq) + *truncate_seq = capsnap->truncate_seq; break; } } @@ -459,6 +501,10 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, snapc = ceph_get_snap_context(ci->i_head_snapc); dout(" head snapc %p has %d dirty pages\n", snapc, ci->i_wrbuffer_ref_head); + if (truncate_size) + *truncate_size = capsnap->truncate_size; + if (truncate_seq) + *truncate_seq = capsnap->truncate_seq; } spin_unlock(&ci->i_ceph_lock); return snapc; @@ -501,7 +547,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p page %p not dirty?\n", inode, page); goto out; } - oldest = get_oldest_context(inode, &snap_size); + oldest = get_oldest_context(inode, &snap_size, + &truncate_size, &truncate_seq); if (snapc->seq > oldest->seq) { dout("writepage %p page %p snapc %p not writeable - noop\n", inode, page, snapc); @@ -512,12 +559,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) } ceph_put_snap_context(oldest); - spin_lock(&ci->i_ceph_lock); - truncate_seq = ci->i_truncate_seq; - truncate_size = ci->i_truncate_size; if (snap_size == -1) snap_size = i_size_read(inode); - spin_unlock(&ci->i_ceph_lock); /* is this a partial page at end of file? */ if (page_off >= snap_size) { @@ -764,7 +807,8 @@ retry: /* find oldest snap context with dirty data */ ceph_put_snap_context(snapc); snap_size = -1; - snapc = get_oldest_context(inode, &snap_size); + snapc = get_oldest_context(inode, &snap_size, + &truncate_size, &truncate_seq); if (!snapc) { /* hmm, why does writepages get called when there is no dirty data? */ @@ -774,11 +818,7 @@ retry: dout(" oldest snapc is %p seq %lld (%d snaps)\n", snapc, snapc->seq, snapc->num_snaps); - spin_lock(&ci->i_ceph_lock); - truncate_seq = ci->i_truncate_seq; - truncate_size = ci->i_truncate_size; i_size = i_size_read(inode); - spin_unlock(&ci->i_ceph_lock); if (last_snapc && snapc != last_snapc) { /* if we switched to a newer snapc, restart our scan at the @@ -1124,7 +1164,8 @@ out: static int context_is_writeable_or_written(struct inode *inode, struct ceph_snap_context *snapc) { - struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); + struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, + NULL, NULL); int ret = !oldest || snapc->seq <= oldest->seq; ceph_put_snap_context(oldest); @@ -1169,7 +1210,7 @@ retry_locked: * this page is already dirty in another (older) snap * context! is it writeable now? */ - oldest = get_oldest_context(inode, NULL); + oldest = get_oldest_context(inode, NULL, NULL, NULL); if (snapc->seq > oldest->seq) { ceph_put_snap_context(oldest); @@ -1371,9 +1412,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || - ci->i_inline_version == CEPH_INLINE_NONE) + ci->i_inline_version == CEPH_INLINE_NONE) { + current->journal_info = vma->vm_file; ret = filemap_fault(vma, vmf); - else + current->journal_info = NULL; + } else ret = -EAGAIN; dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", @@ -1905,6 +1948,15 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) struct ceph_string *pool_ns; int ret, flags; + if (ci->i_vino.snap != CEPH_NOSNAP) { + /* + * Pool permission check needs to write to the first object. + * But for snapshot, head of the first object may have alread + * been deleted. Skip check to avoid creating orphan object. + */ + return 0; + } + if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), NOPOOLPERM)) return 0; |