Merge tag 'ceph-for-5.1-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov: "The highlights are: - rbd will now ignore discards that aren't aligned and big enough to actually free up some space (myself). This is controlled by the new alloc_size map option and can be disabled if needed. - support for rbd deep-flatten feature (myself). Deep-flatten allows "rbd flatten" to fully disconnect the clone image and its snapshots from the parent and make the parent snapshot removable. - a new round of cap handling improvements (Zheng Yan). The kernel client should now be much more prompt about releasing its caps and it is possible to put a limit on the number of caps held. - support for getting ceph.dir.pin extended attribute (Zheng Yan)" * tag 'ceph-for-5.1-rc1' of git://github.com/ceph/ceph-client: (26 commits) Documentation: modern versions of ceph are not backed by btrfs rbd: advertise support for RBD_FEATURE_DEEP_FLATTEN rbd: whole-object write and zeroout should copyup when snapshots exist rbd: copyup with an empty snapshot context (aka deep-copyup) rbd: introduce rbd_obj_issue_copyup_ops() rbd: stop copying num_osd_ops in rbd_obj_issue_copyup() rbd: factor out __rbd_osd_req_create() rbd: clear ->xferred on error from rbd_obj_issue_copyup() rbd: remove experimental designation from kernel layering ceph: add mount option to limit caps count ceph: periodically trim stale dentries ceph: delete stale dentry when last reference is dropped ceph: remove dentry_lru file from debugfs ceph: touch existing cap when handling reply ceph: pass inclusive lend parameter to filemap_write_and_wait_range() rbd: round off and ignore discards that are too small rbd: handle DISCARD and WRITE_ZEROES separately rbd: get rid of obj_req->obj_request_count libceph: use struct_size() for kmalloc() in crush_decode() ceph: send cap releases more aggressively ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-03-12 14:58:35 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-03-12 14:58:35 -0700
commit: 2b0a80b0d0bb0a3db74588279bf851b28c6c4705 (patch)
tree: 1d73413afae3617422027194eda5f71f94fd2c07 /fs/ceph/caps.c
parent: 92825b0298ca6822085ef483f914b6e0dea9bf66 (diff)
parent: d11ae8e0a76afc506071831854348f2ea1f3290e (diff)
download: linux-2b0a80b0d0bb0a3db74588279bf851b28c6c4705.tar.bz2
1 files changed, 48 insertions, 24 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index bba28a5034ba..36a8dc699448 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -148,11 +148,17 @@ void ceph_caps_finalize(struct ceph_mds_client *mdsc)
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
+void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
+			      struct ceph_mount_options *fsopt)
 {
 	spin_lock(&mdsc->caps_list_lock);
-	mdsc->caps_min_count += delta;
-	BUG_ON(mdsc->caps_min_count < 0);
+	mdsc->caps_min_count = fsopt->max_readdir;
+	if (mdsc->caps_min_count < 1024)
+		mdsc->caps_min_count = 1024;
+	mdsc->caps_use_max = fsopt->caps_max;
+	if (mdsc->caps_use_max > 0 &&
+	    mdsc->caps_use_max < mdsc->caps_min_count)
+		mdsc->caps_use_max = mdsc->caps_min_count;
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
@@ -272,6 +278,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 	if (!err) {
 		BUG_ON(have + alloc != need);
 		ctx->count = need;
+		ctx->used = 0;
 	}
 
 	spin_lock(&mdsc->caps_list_lock);
@@ -295,13 +302,24 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 }
 
 void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
-			struct ceph_cap_reservation *ctx)
+			 struct ceph_cap_reservation *ctx)
 {
+	bool reclaim = false;
+	if (!ctx->count)
+		return;
+
 	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
 	spin_lock(&mdsc->caps_list_lock);
 	__ceph_unreserve_caps(mdsc, ctx->count);
 	ctx->count = 0;
+
+	if (mdsc->caps_use_max > 0 &&
+	    mdsc->caps_use_count > mdsc->caps_use_max)
+		reclaim = true;
 	spin_unlock(&mdsc->caps_list_lock);
+
+	if (reclaim)
+		ceph_reclaim_caps_nr(mdsc, ctx->used);
 }
 
 struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
@@ -346,6 +364,7 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 	BUG_ON(list_empty(&mdsc->caps_list));
 
 	ctx->count--;
+	ctx->used++;
 	mdsc->caps_reserve_count--;
 	mdsc->caps_use_count++;
 
@@ -500,12 +519,12 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
 static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
-	struct ceph_mount_options *ma = mdsc->fsc->mount_options;
+	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
 
 	ci->i_hold_caps_min = round_jiffies(jiffies +
-					    ma->caps_wanted_delay_min * HZ);
+					    opt->caps_wanted_delay_min * HZ);
 	ci->i_hold_caps_max = round_jiffies(jiffies +
-					    ma->caps_wanted_delay_max * HZ);
+					    opt->caps_wanted_delay_max * HZ);
 	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
 	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
 }
@@ -657,6 +676,10 @@ void ceph_add_cap(struct inode *inode,
 		session->s_nr_caps++;
 		spin_unlock(&session->s_cap_lock);
 	} else {
+		spin_lock(&session->s_cap_lock);
+		list_move_tail(&cap->session_caps, &session->s_caps);
+		spin_unlock(&session->s_cap_lock);
+
 		if (cap->cap_gen < session->s_cap_gen)
 			cap->issued = cap->implemented = CEPH_CAP_PIN;
 
@@ -1081,9 +1104,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 	    (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) {
 		cap->queue_release = 1;
 		if (removed) {
-			list_add_tail(&cap->session_caps,
-				      &session->s_cap_releases);
-			session->s_num_cap_releases++;
+			__ceph_queue_cap_release(session, cap);
 			removed = 0;
 		}
 	} else {
@@ -1245,7 +1266,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
  * Queue cap releases when an inode is dropped from our cache.  Since
  * inode is about to be destroyed, there is no need for i_ceph_lock.
  */
-void ceph_queue_caps_release(struct inode *inode)
+void __ceph_remove_caps(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct rb_node *p;
@@ -2393,6 +2414,12 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
 		if ((cap->issued & ci->i_flushing_caps) !=
 		    ci->i_flushing_caps) {
 			ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
+			/* encode_caps_cb() also will reset these sequence
+			 * numbers. make sure sequence numbers in cap flush
+			 * message match later reconnect message */
+			cap->seq = 0;
+			cap->issue_seq = 0;
+			cap->mseq = 0;
 			__kick_flushing_caps(mdsc, session, ci,
 					     oldest_flush_tid);
 		} else {
@@ -3880,12 +3907,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 			cap->seq = seq;
 			cap->issue_seq = seq;
 			spin_lock(&session->s_cap_lock);
-			list_add_tail(&cap->session_caps,
-					&session->s_cap_releases);
-			session->s_num_cap_releases++;
+			__ceph_queue_cap_release(session, cap);
 			spin_unlock(&session->s_cap_lock);
 		}
-		goto flush_cap_releases;
+		goto done;
 	}
 
 	/* these will work even if we don't have a cap yet */
@@ -3955,7 +3980,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		       ceph_cap_op_name(op));
 	}
 
-	goto done;
+done:
+	mutex_unlock(&session->s_mutex);
+done_unlocked:
+	iput(inode);
+	ceph_put_string(extra_info.pool_ns);
+	return;
 
 flush_cap_releases:
 	/*
@@ -3963,14 +3993,8 @@ flush_cap_releases:
 	 * along for the mds (who clearly thinks we still have this
 	 * cap).
 	 */
-	ceph_send_cap_releases(mdsc, session);
-
-done:
-	mutex_unlock(&session->s_mutex);
-done_unlocked:
-	iput(inode);
-	ceph_put_string(extra_info.pool_ns);
-	return;
+	ceph_flush_cap_releases(mdsc, session);
+	goto done;
 
 bad:
 	pr_err("ceph_handle_caps: corrupt message\n");
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-03-12 14:58:35 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-03-12 14:58:35 -0700
commit	2b0a80b0d0bb0a3db74588279bf851b28c6c4705 (patch)
tree	1d73413afae3617422027194eda5f71f94fd2c07 /fs/ceph/caps.c
parent	92825b0298ca6822085ef483f914b6e0dea9bf66 (diff)
parent	d11ae8e0a76afc506071831854348f2ea1f3290e (diff)
download	linux-2b0a80b0d0bb0a3db74588279bf851b28c6c4705.tar.bz2