From 89d43d0551a848e70e63d9ba11534aaeabc82443 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 12 Jan 2022 12:29:04 +0800 Subject: ceph: put the requests/sessions when it fails to alloc memory When failing to allocate the sessions memory we should make sure the req1 and req2 and the sessions get put. And also in case the max_sessions decreased so when kreallocate the new memory some sessions maybe missed being put. And if the max_sessions is 0 krealloc will return ZERO_SIZE_PTR, which will lead to a distinct access fault. URL: https://tracker.ceph.com/issues/53819 Fixes: e1a4541ec0b9 ("ceph: flush the mdlog before waiting on unsafe reqs") Signed-off-by: Xiubo Li Reviewed-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 55 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 18 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7d305b974824..b472cd066d1c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2218,6 +2218,7 @@ static int unsafe_request_wait(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; + unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2235,37 +2236,45 @@ static int unsafe_request_wait(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); + /* + * The mdsc->max_sessions is unlikely to be changed + * mostly, here we will retry it by reallocating the + * sessions array memory to get rid of the mdsc->mutex + * lock. + */ +retry: + max_sessions = mdsc->max_sessions; + /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if (req1 || req2) { + if ((req1 || req2) && likely(max_sessions)) { struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s; struct ceph_mds_request *req; - unsigned int max; int i; - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions arrary memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max = mdsc->max_sessions; - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO); - if (!sessions) - return -ENOMEM; + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); + if (!sessions) { + err = -ENOMEM; + goto out; + } spin_lock(&ci->i_unsafe_lock); if (req1) { list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2278,8 +2287,14 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2300,7 +2315,7 @@ retry: spin_unlock(&ci->i_ceph_lock); /* send flush mdlog request to MDSes */ - for (i = 0; i < max; i++) { + for (i = 0; i < max_sessions; i++) { s = sessions[i]; if (s) { send_flush_mdlog(s); @@ -2317,15 +2332,19 @@ retry: ceph_timeout_jiffies(req1->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req1); } if (req2) { ret = !wait_for_completion_timeout(&req2->r_safe_completion, ceph_timeout_jiffies(req2->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req2); } + +out: + if (req1) + ceph_mdsc_put_request(req1); + if (req2) + ceph_mdsc_put_request(req2); return err; } -- cgit v1.2.3 From 932a9b5870d38b87ba0a9923c804b1af7d3605b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 25 Jan 2022 15:39:16 -0500 Subject: ceph: properly put ceph_string reference after async create attempt The reference acquired by try_prep_async_create is currently leaked. Ensure we put it. Cc: stable@vger.kernel.org Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible") Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5b9104b8e453..6afae97be9da 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -750,8 +750,10 @@ retry: restore_deleg_ino(dir, req->r_deleg_ino); ceph_mdsc_put_request(req); try_async = false; + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto retry; } + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto out_req; } } -- cgit v1.2.3 From 4584a768f22b7669cdebabc911543621ac661341 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 26 Jan 2022 12:36:49 -0500 Subject: ceph: set pool_ns in new inode layout for async creates Dan reported that he was unable to write to files that had been asynchronously created when the client's OSD caps are restricted to a particular namespace. The issue is that the layout for the new inode is only partially being filled. Ensure that we populate the pool_ns_data and pool_ns_len in the iinfo before calling ceph_fill_inode. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/54013 Fixes: 9a8d03ca2e2c ("ceph: attempt to do async create when possible") Reported-by: Dan van der Ster Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6afae97be9da..bbed3224ad68 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -583,6 +583,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -632,6 +633,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.max_size = cpu_to_le64(lo->stripe_unit); ceph_file_layout_to_legacy(lo, &in.layout); + /* lo is private, so pool_ns can't change */ + pool_ns = rcu_dereference_raw(lo->pool_ns); + if (pool_ns) { + iinfo.pool_ns_len = pool_ns->len; + iinfo.pool_ns_data = pool_ns->str; + } down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, -- cgit v1.2.3