From ca6c8ae0f7930dad7e10664e3b5bc657dd75be60 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Dec 2016 08:37:59 -0500 Subject: ceph: pass parent inode info to ceph_encode_dentry_release if we have it If we have a parent inode reference already, then we don't need to go back up the directory tree to find one. Link: http://tracker.ceph.com/issues/18148 Signed-off-by: Jeff Layton Reviewed-by: Yan, Zheng Signed-off-by: Ilya Dryomov --- fs/ceph/super.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ceph/super.h') diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3373b61faefd..a80a915ca247 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -904,6 +904,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); extern int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force); extern int ceph_encode_dentry_release(void **p, struct dentry *dn, + struct inode *dir, int mds, int drop, int unless); extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, -- cgit v1.2.3 From 7c94ba27903c3c46c5829e96efb7c2a8ab7510e9 Mon Sep 17 00:00:00 2001 From: Andreas Gerstmayr Date: Tue, 10 Jan 2017 14:17:56 +0100 Subject: ceph: set io_pages bdi hint This patch sets the io_pages bdi hint based on the rsize mount option. Without this patch large buffered reads (request size > max readahead) are processed sequentially in chunks of the readahead size (i.e. read requests are sent out up to the readahead size, then the do_generic_file_read() function waits until the first page is received). With this patch read requests are sent out at once up to the size specified in the rsize mount option (default: 64 MB). Signed-off-by: Andreas Gerstmayr Acked-by: Jeff Layton Signed-off-by: Yan, Zheng --- Documentation/filesystems/ceph.txt | 5 ++--- fs/ceph/super.c | 8 ++++++++ fs/ceph/super.h | 4 ++-- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'fs/ceph/super.h') diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index f5306ee40ea9..0b302a11718a 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -98,11 +98,10 @@ Mount Options size. rsize=X - Specify the maximum read size in bytes. By default there is no - maximum. + Specify the maximum read size in bytes. Default: 64 MB. rasize=X - Specify the maximum readahead. + Specify the maximum readahead. Default: 8 MB. mount_timeout=X Specify the timeout value for mount (in seconds), in the case diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 6bd20d707bfd..a0a0b6d02f89 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -952,6 +952,14 @@ static int ceph_register_bdi(struct super_block *sb, fsc->backing_dev_info.ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + if (fsc->mount_options->rsize > fsc->mount_options->rasize && + fsc->mount_options->rsize >= PAGE_SIZE) + fsc->backing_dev_info.io_pages = + (fsc->mount_options->rsize + PAGE_SIZE - 1) + >> PAGE_SHIFT; + else if (fsc->mount_options->rsize == 0) + fsc->backing_dev_info.io_pages = ULONG_MAX; + err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", atomic_long_inc_return(&bdi_seq)); if (!err) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a80a915ca247..f3f9215abf8e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -45,8 +45,8 @@ #define ceph_test_mount_opt(fsc, opt) \ (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) -#define CEPH_RSIZE_DEFAULT 0 /* max read size */ -#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */ +#define CEPH_RSIZE_DEFAULT (64*1024*1024) /* max read size */ +#define CEPH_RASIZE_DEFAULT (8192*1024) /* max readahead */ #define CEPH_MAX_READDIR_DEFAULT 1024 #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) #define CEPH_SNAPDIRNAME_DEFAULT ".snap" -- cgit v1.2.3 From c1944fedd8c492ce1c1a99ca9064dcc7bafa80e9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 29 Jan 2017 22:15:47 +0800 Subject: ceph: avoid calling ceph_renew_caps() infinitely __ceph_caps_mds_wanted() ignores caps from stale session. So the return value of __ceph_caps_mds_wanted() can keep the same across ceph_renew_caps(). This causes try_get_cap_refs() to keep calling ceph_renew_caps(). The fix is ignore the session valid check for the try_get_cap_refs() case. If session is stale, just let the caps requester sleep. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 6 +++--- fs/ceph/file.c | 2 +- fs/ceph/super.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/ceph/super.h') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ed8c7addce91..3c2dfd72e5b2 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) /* * Return caps we have registered with the MDS(s) as 'wanted'. */ -int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) +int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check) { struct ceph_cap *cap; struct rb_node *p; @@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); - if (!__cap_is_valid(cap)) + if (check && !__cap_is_valid(cap)) continue; if (cap == ci->i_auth_cap) mds_wanted |= cap->mds_wanted; @@ -2491,7 +2491,7 @@ again: ret = 1; goto out_unlock; } - mds_wanted = __ceph_caps_mds_wanted(ci); + mds_wanted = __ceph_caps_mds_wanted(ci, false); if (need & ~(mds_wanted & need)) { dout("get_cap_refs %p caps were dropped" " (session killed?)\n", inode); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 045d30d26624..beb24f8bb917 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file) spin_lock(&ci->i_ceph_lock); if (__ceph_is_any_real_caps(ci) && (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { - int mds_wanted = __ceph_caps_mds_wanted(ci); + int mds_wanted = __ceph_caps_mds_wanted(ci, true); int issued = __ceph_caps_issued(ci, NULL); dout("open %p fmode %d want %s issued %s using existing\n", diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f3f9215abf8e..6477264bfc7e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -602,7 +602,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci) } /* what the mds thinks we want */ -extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); +extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check); extern void ceph_caps_init(struct ceph_mds_client *mdsc); extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); -- cgit v1.2.3 From f5a03b080450784e671998921feb62fd3846c953 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 31 Jan 2017 11:06:13 -0500 Subject: ceph: drop session argument to ceph_fill_trace Just get it from r_session since that's what's always passed in. Signed-off-by: Jeff Layton Reviewed-by: Yan, Zheng Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 4 ++-- fs/ceph/mds_client.c | 2 +- fs/ceph/super.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/ceph/super.h') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 38a54ebf647d..b18462c64cdd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1108,9 +1108,9 @@ out: * * Called with snap_rwsem (read). */ -int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, - struct ceph_mds_session *session) +int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) { + struct ceph_mds_session *session = req->r_session; struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct inode *in = NULL; struct ceph_vino vino; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 176512960b14..4a68067a76b5 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2516,7 +2516,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* insert trace into our cache */ mutex_lock(&req->r_fill_mutex); current->journal_info = req; - err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); + err = ceph_fill_trace(mdsc->fsc->sb, req); if (err == 0) { if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || req->r_op == CEPH_MDS_OP_LSSNAP)) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6477264bfc7e..950170136be9 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -764,8 +764,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued, u64 time_warp_seq, struct timespec *ctime, struct timespec *mtime, struct timespec *atime); extern int ceph_fill_trace(struct super_block *sb, - struct ceph_mds_request *req, - struct ceph_mds_session *session); + struct ceph_mds_request *req); extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct ceph_mds_session *session); -- cgit v1.2.3 From 55f2a04588c5881d90e22631b17a84cd25d17cc4 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 13 Feb 2017 14:44:19 +0100 Subject: ceph: remove special ack vs commit behavior - ask for a commit reply instead of an ack reply in __ceph_pool_perm_get() - don't ask for both ack and commit replies in ceph_sync_write() - since just only one reply is requested now, i_unsafe_writes list will always be empty -- kill ceph_sync_write_wait() and go back to a standard ->evict_inode() Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Reviewed-by: Sage Weil --- fs/ceph/addr.c | 2 +- fs/ceph/caps.c | 2 -- fs/ceph/file.c | 88 +-------------------------------------------------------- fs/ceph/inode.c | 9 ------ fs/ceph/super.c | 1 - fs/ceph/super.h | 4 +-- 6 files changed, 3 insertions(+), 103 deletions(-) (limited to 'fs/ceph/super.h') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4547bbf80e4f..3f0474c55f05 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1872,7 +1872,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, goto out_unlock; } - wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK; + wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc); ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 3c2dfd72e5b2..cd966f276a8d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2091,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); - ceph_sync_write_wait(inode); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret < 0) goto out; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index baad3b688ada..023cb7fd9b5f 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -795,89 +795,6 @@ out: kfree(aio_work); } -/* - * Write commit request unsafe callback, called to tell us when a - * request is unsafe (that is, in flight--has been handed to the - * messenger to send to its target osd). It is called again when - * we've received a response message indicating the request is - * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request - * is completed early (and unsuccessfully) due to a timeout or - * interrupt. - * - * This is used if we requested both an ACK and ONDISK commit reply - * from the OSD. - */ -static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) -{ - struct ceph_inode_info *ci = ceph_inode(req->r_inode); - - dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid, - unsafe ? "un" : ""); - if (unsafe) { - ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); - spin_lock(&ci->i_unsafe_lock); - list_add_tail(&req->r_unsafe_item, - &ci->i_unsafe_writes); - spin_unlock(&ci->i_unsafe_lock); - - complete_all(&req->r_completion); - } else { - spin_lock(&ci->i_unsafe_lock); - list_del_init(&req->r_unsafe_item); - spin_unlock(&ci->i_unsafe_lock); - ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); - } -} - -/* - * Wait on any unsafe replies for the given inode. First wait on the - * newest request, and make that the upper bound. Then, if there are - * more requests, keep waiting on the oldest as long as it is still older - * than the original request. - */ -void ceph_sync_write_wait(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_writes; - struct ceph_osd_request *req; - u64 last_tid; - - if (!S_ISREG(inode->i_mode)) - return; - - spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - /* set upper bound as _last_ entry in chain */ - - req = list_last_entry(head, struct ceph_osd_request, - r_unsafe_item); - last_tid = req->r_tid; - - do { - ceph_osdc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); - - dout("sync_write_wait on tid %llu (until %llu)\n", - req->r_tid, last_tid); - wait_for_completion(&req->r_done_completion); - ceph_osdc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - /* - * from here on look at first entry in chain, since we - * only want to wait for anything older than last_tid - */ - if (list_empty(head)) - break; - req = list_first_entry(head, struct ceph_osd_request, - r_unsafe_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); -} - static ssize_t ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct ceph_snap_context *snapc, @@ -1119,8 +1036,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE | - CEPH_OSD_FLAG_ACK; + CEPH_OSD_FLAG_WRITE; while ((len = iov_iter_count(from)) > 0) { size_t left; @@ -1166,8 +1082,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, goto out; } - /* get a second commit callback */ - req->r_unsafe_callback = ceph_sync_write_unsafe; req->r_inode = inode; osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 68f46132b157..fd8f771f99b7 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; - INIT_LIST_HEAD(&ci->i_unsafe_writes); INIT_LIST_HEAD(&ci->i_unsafe_dirops); INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock); @@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode) return 1; } -void ceph_evict_inode(struct inode *inode) -{ - /* wait unsafe sync writes */ - ceph_sync_write_wait(inode); - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); -} - static inline blkcnt_t calc_inode_blocks(u64 size) { return (size + (1<<9) - 1) >> 9; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a0a0b6d02f89..0ec8d0114e57 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = { .destroy_inode = ceph_destroy_inode, .write_inode = ceph_write_inode, .drop_inode = ceph_drop_inode, - .evict_inode = ceph_evict_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, .show_options = ceph_show_options, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 950170136be9..e9410bcf4113 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -343,7 +343,6 @@ struct ceph_inode_info { u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ - struct list_head i_unsafe_writes; /* uncommitted sync writes */ struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock; @@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops; extern struct inode *ceph_alloc_inode(struct super_block *sb); extern void ceph_destroy_inode(struct inode *inode); extern int ceph_drop_inode(struct inode *inode); -extern void ceph_evict_inode(struct inode *inode); extern struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino); @@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); -extern void ceph_sync_write_wait(struct inode *inode); + /* dir.c */ extern const struct file_operations ceph_dir_fops; extern const struct file_operations ceph_snapdir_fops; -- cgit v1.2.3