summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/debugfs.c20
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/inode.c41
-rw-r--r--fs/ceph/locks.c8
-rw-r--r--fs/ceph/mds_client.c280
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/mdsmap.c25
-rw-r--r--fs/ceph/metric.c18
-rw-r--r--fs/ceph/metric.h14
-rw-r--r--fs/ceph/quota.c58
-rw-r--r--fs/ceph/super.c14
-rw-r--r--fs/ceph/super.h7
-rw-r--r--fs/ceph/xattr.c81
15 files changed, 410 insertions, 201 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 35c83f65475b..950552944436 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
@@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO);
}
@@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) {
- page = grab_cache_page_write_begin(mapping, index, 0);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
r = -ENOMEM;
break;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ded4229c314a..255a512f1277 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc =
- ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc;
int removed = 0;
+ /* 'ci' being NULL means the remove have already occurred */
+ if (!ci) {
+ dout("%s: cap inode is NULL\n", __func__);
+ return;
+ }
+
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
+
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap) {
- WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
+ WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
+ !mdsc->fsc->blocklisted);
ci->i_auth_cap = NULL;
}
@@ -2746,7 +2754,7 @@ again:
goto out_unlock;
}
- if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO;
goto out_unlock;
@@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 8) {
- u64 flush_tid;
- u32 caller_uid, caller_gid;
u32 pool_ns_len;
/* version >= 6 */
- ceph_decode_64_safe(&p, end, flush_tid, bad);
+ ceph_decode_skip_64(&p, end, bad); // flush_tid
/* version >= 7 */
- ceph_decode_32_safe(&p, end, caller_uid, bad);
- ceph_decode_32_safe(&p, end, caller_gid, bad);
+ ceph_decode_skip_32(&p, end, bad); // caller_uid
+ ceph_decode_skip_32(&p, end, bad); // caller_gid
/* version >= 8 */
ceph_decode_32_safe(&p, end, pool_ns_len, bad);
if (pool_ns_len > 0) {
@@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 11) {
- u32 flags;
/* version >= 10 */
- ceph_decode_32_safe(&p, end, flags, bad);
+ ceph_decode_skip_32(&p, end, bad); // flags
/* version >= 11 */
extra_info.dirstat_valid = true;
ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 7a8fbe3e4751..66989c880adb 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0;
}
+static int status_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
+ struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
+
+ seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
+ ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
+ seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
+
+ return 0;
+}
+
DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(metric);
+DEFINE_SHOW_ATTRIBUTE(status);
/*
@@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc->client->debugfs_dir,
fsc,
&caps_fops);
+
+ fsc->debugfs_status = debugfs_create_file("status",
+ 0400,
+ fsc->client->debugfs_dir,
+ fsc,
+ &status_fops);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a4d48370b2b3..858ee7362ff5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP;
else
return -EROFS;
- } else if (old_dir != new_dir) {
- err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
- new_dir);
- if (err)
- return err;
}
+ /* don't allow cross-quota renames */
+ if ((old_dir != new_dir) &&
+ (!ceph_quota_is_same_realm(old_dir, new_dir)))
+ return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 526faf4778ce..adc8fc3c5d85 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1315,15 +1315,10 @@ retry_lookup:
}
if (rinfo->head->is_target) {
- tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
-
- in = ceph_get_inode(sb, tvino);
- if (IS_ERR(in)) {
- err = PTR_ERR(in);
- goto done;
- }
+ /* Should be filled in by handle_reply */
+ BUG_ON(!req->r_target_inode);
+ in = req->r_target_inode;
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
@@ -1333,11 +1328,13 @@ retry_lookup:
if (err < 0) {
pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in));
+ req->r_target_inode = NULL;
if (in->i_state & I_NEW)
discard_new_inode(in);
+ else
+ iput(in);
goto done;
}
- req->r_target_inode = in;
if (in->i_state & I_NEW)
unlock_new_inode(in);
}
@@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct dentry *dn;
struct inode *in;
int err = 0, skipped = 0, ret, i;
- struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
- u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+ u32 frag = le32_to_cpu(req->r_args.readdir.frag);
u32 last_hash = 0;
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
@@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} else if (rinfo->offset_hash) {
/* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2);
- last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
+ last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
}
}
@@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
mutex_lock(&ci->i_truncate_mutex);
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
@@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
}
/* Craft a mask of needed caps given a set of requested statx attrs. */
-static int statx_to_caps(u32 want)
+static int statx_to_caps(u32 want, umode_t mode)
{
int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
mask |= CEPH_CAP_AUTH_SHARED;
- if (want & (STATX_NLINK|STATX_CTIME))
- mask |= CEPH_CAP_LINK_SHARED;
+ if (want & (STATX_NLINK|STATX_CTIME)) {
+ /*
+ * The link count for directories depends on inode->i_subdirs,
+ * and that is only updated when Fs caps are held.
+ */
+ if (S_ISDIR(mode))
+ mask |= CEPH_CAP_FILE_SHARED;
+ else
+ mask |= CEPH_CAP_LINK_SHARED;
+ }
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS))
@@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
/* Skip the getattr altogether if we're asked not to sync */
if (!(flags & AT_STATX_DONT_SYNC)) {
- err = ceph_do_getattr(inode, statx_to_caps(request_mask),
- flags & AT_STATX_FORCE_SYNC);
+ err = ceph_do_getattr(inode,
+ statx_to_caps(request_mask, inode->i_mode),
+ flags & AT_STATX_FORCE_SYNC);
if (err)
return err;
}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 048a435a29be..fa8a847743d0 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
.fl_release_private = ceph_fl_release_lock,
};
-/**
+/*
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
@@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
return 1;
}
-/**
+/*
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
*/
@@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
return err;
}
-/**
+/*
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC.
@@ -458,7 +458,7 @@ fail:
return err;
}
-/**
+/*
* Copy the encoded flock and fcntl locks into the pagelist.
* Format is: #fcntl locks, sequential fcntl locks, #flock locks,
* sequential flock locks.
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8f1d7500a7ec..98c15ff2e599 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -516,13 +516,9 @@ static int parse_reply_info_create(void **p, void *end,
/* Malformed reply? */
info->has_create_ino = false;
} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
- u8 struct_v, struct_compat;
- u32 len;
-
info->has_create_ino = true;
- ceph_decode_8_safe(p, end, struct_v, bad);
- ceph_decode_8_safe(p, end, struct_compat, bad);
- ceph_decode_32_safe(p, end, len, bad);
+ /* struct_v, struct_compat, and len */
+ ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad);
ceph_decode_64_safe(p, end, info->ino, bad);
ret = ceph_parse_deleg_inos(p, end, s);
if (ret)
@@ -837,6 +833,7 @@ void ceph_mdsc_release_request(struct kref *kref)
}
kfree(req->r_path1);
kfree(req->r_path2);
+ put_cred(req->r_cred);
if (req->r_pagelist)
ceph_pagelist_release(req->r_pagelist);
put_request_session(req);
@@ -892,8 +889,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
ceph_mdsc_get_request(req);
insert_request(&mdsc->request_tree, req);
- req->r_uid = current_fsuid();
- req->r_gid = current_fsgid();
+ req->r_cred = get_current_cred();
if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
mdsc->oldest_tid = req->r_tid;
@@ -1243,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
{
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
- int i = -1;
+ int i;
int extra_bytes = 0;
int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options;
@@ -1595,7 +1591,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc;
- if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (inode->i_data.nrpages > 0)
invalidate = true;
if (ci->i_wrbuffer_ref > 0)
@@ -2482,21 +2478,24 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
/*
* called under mdsc->mutex
*/
-static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
+static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
struct ceph_mds_request *req,
- int mds, bool drop_cap_releases)
+ bool drop_cap_releases)
{
+ int mds = session->s_mds;
+ struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_msg *msg;
- struct ceph_mds_request_head *head;
+ struct ceph_mds_request_head_old *head;
const char *path1 = NULL;
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
bool freepath1 = false, freepath2 = false;
- int len;
+ int len, i;
u16 releases;
void *p, *end;
int ret;
+ bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
@@ -2518,14 +2517,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free1;
}
- len = sizeof(*head) +
- pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
+ if (legacy) {
+ /* Old style */
+ len = sizeof(*head);
+ } else {
+ /* New style: add gid_list and any later fields */
+ len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
+ (sizeof(u64) * req->r_cred->group_info->ngroups);
+ }
+
+ len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
sizeof(struct ceph_timespec);
/* calculate (max) length for cap releases */
len += sizeof(struct ceph_mds_request_release) *
(!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
+
if (req->r_dentry_drop)
len += pathlen1;
if (req->r_old_dentry_drop)
@@ -2537,17 +2545,33 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2;
}
- msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid);
- head = msg->front.iov_base;
- p = msg->front.iov_base + sizeof(*head);
+ /*
+ * The old ceph_mds_request_header didn't contain a version field, and
+ * one was added when we moved the message version from 3->4.
+ */
+ if (legacy) {
+ msg->hdr.version = cpu_to_le16(3);
+ head = msg->front.iov_base;
+ p = msg->front.iov_base + sizeof(*head);
+ } else {
+ struct ceph_mds_request_head *new_head = msg->front.iov_base;
+
+ msg->hdr.version = cpu_to_le16(4);
+ new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+ head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
+ p = msg->front.iov_base + sizeof(*new_head);
+ }
+
end = msg->front.iov_base + msg->front.iov_len;
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op);
- head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
- head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
+ head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
+ req->r_cred->fsuid));
+ head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
+ req->r_cred->fsgid));
head->ino = cpu_to_le64(req->r_deleg_ino);
head->args = req->r_args;
@@ -2592,6 +2616,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_copy(&p, &ts, sizeof(ts));
}
+ /* gid list */
+ if (!legacy) {
+ ceph_encode_32(&p, req->r_cred->group_info->ngroups);
+ for (i = 0; i < req->r_cred->group_info->ngroups; i++)
+ ceph_encode_64(&p, from_kgid(&init_user_ns,
+ req->r_cred->group_info->gid[i]));
+ }
+
if (WARN_ON_ONCE(p > end)) {
ceph_msg_put(msg);
msg = ERR_PTR(-ERANGE);
@@ -2635,14 +2667,28 @@ static void complete_request(struct ceph_mds_client *mdsc,
complete_all(&req->r_completion);
}
+static struct ceph_mds_request_head_old *
+find_old_request_head(void *p, u64 features)
+{
+ bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
+ struct ceph_mds_request_head *new_head;
+
+ if (legacy)
+ return (struct ceph_mds_request_head_old *)p;
+ new_head = (struct ceph_mds_request_head *)p;
+ return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
+}
+
/*
* called under mdsc->mutex
*/
-static int __prepare_send_request(struct ceph_mds_client *mdsc,
+static int __prepare_send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
- int mds, bool drop_cap_releases)
+ bool drop_cap_releases)
{
- struct ceph_mds_request_head *rhead;
+ int mds = session->s_mds;
+ struct ceph_mds_client *mdsc = session->s_mdsc;
+ struct ceph_mds_request_head_old *rhead;
struct ceph_msg *msg;
int flags = 0;
@@ -2661,6 +2707,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p;
+
/*
* Replay. Do not regenerate message (and rebuild
* paths, etc.); just use the original message.
@@ -2668,7 +2715,8 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
* d_move mangles the src name.
*/
msg = req->r_request;
- rhead = msg->front.iov_base;
+ rhead = find_old_request_head(msg->front.iov_base,
+ session->s_con.peer_features);
flags = le32_to_cpu(rhead->flags);
flags |= CEPH_MDS_FLAG_REPLAY;
@@ -2699,14 +2747,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request);
req->r_request = NULL;
}
- msg = create_request_message(mdsc, req, mds, drop_cap_releases);
+ msg = create_request_message(session, req, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
return PTR_ERR(msg);
}
req->r_request = msg;
- rhead = msg->front.iov_base;
+ rhead = find_old_request_head(msg->front.iov_base,
+ session->s_con.peer_features);
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
@@ -2725,15 +2774,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/*
* called under mdsc->mutex
*/
-static int __send_request(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session,
+static int __send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
bool drop_cap_releases)
{
int err;
- err = __prepare_send_request(mdsc, req, session->s_mds,
- drop_cap_releases);
+ err = __prepare_send_request(session, req, drop_cap_releases);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2818,10 +2865,6 @@ static void __do_request(struct ceph_mds_client *mdsc,
ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
- if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
- err = -EACCES;
- goto out_session;
- }
/*
* We cannot queue async requests since the caps and delegated
* inodes are bound to the session. Just return -EJUKEBOX and
@@ -2831,6 +2874,20 @@ static void __do_request(struct ceph_mds_client *mdsc,
err = -EJUKEBOX;
goto out_session;
}
+
+ /*
+ * If the session has been REJECTED, then return a hard error,
+ * unless it's a CLEANRECOVER mount, in which case we'll queue
+ * it to the mdsc queue.
+ */
+ if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
+ if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER))
+ list_add(&req->r_wait, &mdsc->waiting_for_map);
+ else
+ err = -EACCES;
+ goto out_session;
+ }
+
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING) {
err = __open_session(mdsc, session);
@@ -2850,7 +2907,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
- err = __send_request(mdsc, session, req, false);
+ err = __send_request(session, req, false);
out_session:
ceph_put_mds_session(session);
@@ -3173,6 +3230,23 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
mutex_unlock(&mdsc->mutex);
+ /* Must find target inode outside of mutexes to avoid deadlocks */
+ if ((err >= 0) && rinfo->head->is_target) {
+ struct inode *in;
+ struct ceph_vino tvino = {
+ .ino = le64_to_cpu(rinfo->targeti.in->ino),
+ .snap = le64_to_cpu(rinfo->targeti.in->snapid)
+ };
+
+ in = ceph_get_inode(mdsc->fsc->sb, tvino);
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
+ mutex_lock(&session->s_mutex);
+ goto out_err;
+ }
+ req->r_target_inode = in;
+ }
+
mutex_lock(&session->s_mutex);
if (err < 0) {
pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
@@ -3514,7 +3588,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
- __send_request(mdsc, session, req, true);
+ __send_request(session, req, true);
/*
* also re-send old requests when MDS enters reconnect stage. So that MDS
@@ -3535,7 +3609,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
ceph_mdsc_release_dir_caps_no_check(req);
- __send_request(mdsc, session, req, true);
+ __send_request(session, req, true);
}
mutex_unlock(&mdsc->mutex);
}
@@ -4374,12 +4448,7 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
if (!READ_ONCE(fsc->blocklisted))
return;
- if (fsc->last_auto_reconnect &&
- time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
- return;
-
pr_info("auto reconnect after blocklisted\n");
- fsc->last_auto_reconnect = jiffies;
ceph_force_reconnect(fsc->sb);
}
@@ -4678,7 +4747,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
- if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@@ -4855,10 +4924,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len;
u32 epoch;
- u32 map_len;
u32 num_fs;
u32 mount_fscid = (u32)-1;
- u8 struct_v, struct_cv;
int err = -EINVAL;
ceph_decode_need(&p, end, sizeof(u32), bad);
@@ -4866,24 +4933,17 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
dout("handle_fsmap epoch %u\n", epoch);
- ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
- struct_v = ceph_decode_8(&p);
- struct_cv = ceph_decode_8(&p);
- map_len = ceph_decode_32(&p);
-
- ceph_decode_need(&p, end, sizeof(u32) * 3, bad);
- p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
+ /* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */
+ ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad);
- num_fs = ceph_decode_32(&p);
+ ceph_decode_32_safe(&p, end, num_fs, bad);
while (num_fs-- > 0) {
void *info_p, *info_end;
u32 info_len;
- u8 info_v, info_cv;
u32 fscid, namelen;
ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
- info_v = ceph_decode_8(&p);
- info_cv = ceph_decode_8(&p);
+ p += 2; // info_v, info_cv
info_len = ceph_decode_32(&p);
ceph_decode_need(&p, end, info_len, bad);
info_p = p;
@@ -4954,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
return;
}
- newmap = ceph_mdsmap_decode(&p, end);
+ newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client));
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad_unlock;
@@ -5081,23 +5141,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
+ int ret;
- if (force_new && auth->authorizer) {
- ceph_auth_destroy_authorizer(auth->authorizer);
- auth->authorizer = NULL;
- }
- if (!auth->authorizer) {
- int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
- auth);
- if (ret)
- return ERR_PTR(ret);
- } else {
- int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
- auth);
- if (ret)
- return ERR_PTR(ret);
- }
- *proto = ac->protocol;
+ ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
+ force_new, proto, NULL, NULL);
+ if (ret)
+ return ERR_PTR(ret);
return auth;
}
@@ -5118,8 +5167,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+ struct ceph_auth_handshake *auth = &s->s_auth;
- return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
+ return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
+ auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
+ NULL, NULL, NULL, NULL);
}
static int invalidate_authorizer(struct ceph_connection *con)
@@ -5133,6 +5185,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
}
+static int mds_get_auth_request(struct ceph_connection *con,
+ void *buf, int *buf_len,
+ void **authorizer, int *authorizer_len)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ int ret;
+
+ ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
+ buf, buf_len);
+ if (ret)
+ return ret;
+
+ *authorizer = auth->authorizer_buf;
+ *authorizer_len = auth->authorizer_buf_len;
+ return 0;
+}
+
+static int mds_handle_auth_reply_more(struct ceph_connection *con,
+ void *reply, int reply_len,
+ void *buf, int *buf_len,
+ void **authorizer, int *authorizer_len)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ int ret;
+
+ ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
+ buf, buf_len);
+ if (ret)
+ return ret;
+
+ *authorizer = auth->authorizer_buf;
+ *authorizer_len = auth->authorizer_buf_len;
+ return 0;
+}
+
+static int mds_handle_auth_done(struct ceph_connection *con,
+ u64 global_id, void *reply, int reply_len,
+ u8 *session_key, int *session_key_len,
+ u8 *con_secret, int *con_secret_len)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+
+ return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
+ session_key, session_key_len,
+ con_secret, con_secret_len);
+}
+
+static int mds_handle_auth_bad_method(struct ceph_connection *con,
+ int used_proto, int result,
+ const int *allowed_protos, int proto_cnt,
+ const int *allowed_modes, int mode_cnt)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mon_client *monc = &s->s_mdsc->fsc->client->monc;
+ int ret;
+
+ if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_MDS,
+ used_proto, result,
+ allowed_protos, proto_cnt,
+ allowed_modes, mode_cnt)) {
+ ret = ceph_monc_validate_auth(monc);
+ if (ret)
+ return ret;
+ }
+
+ return -EACCES;
+}
+
static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip)
{
@@ -5182,6 +5308,10 @@ static const struct ceph_connection_operations mds_con_ops = {
.alloc_msg = mds_alloc_msg,
.sign_message = mds_sign_message,
.check_message_signature = mds_check_message_signature,
+ .get_auth_request = mds_get_auth_request,
+ .handle_auth_reply_more = mds_handle_auth_reply_more,
+ .handle_auth_done = mds_handle_auth_done,
+ .handle_auth_bad_method = mds_handle_auth_bad_method,
};
/* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index f5adbebcb38e..eaa7c5422116 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -275,8 +275,7 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */
- kuid_t r_uid;
- kgid_t r_gid;
+ const struct cred *r_cred;
int r_request_release_offset;
struct timespec64 r_stamp;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index e4aba6c6d3b5..abd9af7727ad 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -114,7 +114,7 @@ bad:
* Ignore any fields we don't care about (there are quite a few of
* them).
*/
-struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
+struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_mdsmap *m;
const void *start = *p;
@@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
namelen = ceph_decode_32(p); /* skip mds name */
*p += namelen;
- ceph_decode_need(p, end,
- 4*sizeof(u32) + sizeof(u64) +
- sizeof(addr) + sizeof(struct ceph_timespec),
- bad);
- mds = ceph_decode_32(p);
- inc = ceph_decode_32(p);
- state = ceph_decode_32(p);
+ ceph_decode_32_safe(p, end, mds, bad);
+ ceph_decode_32_safe(p, end, inc, bad);
+ ceph_decode_32_safe(p, end, state, bad);
*p += sizeof(u64); /* state_seq */
- err = ceph_decode_entity_addr(p, end, &addr);
+ if (info_v >= 8)
+ err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
+ else
+ err = ceph_decode_entity_addr(p, end, &addr);
if (err)
goto corrupt;
- ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
+
+ ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
+ bad);
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
@@ -243,8 +244,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
}
if (state <= 0) {
- pr_warn("mdsmap_decode got incorrect state(%s)\n",
- ceph_mds_state_name(state));
+ dout("mdsmap_decode got incorrect state(%s)\n",
+ ceph_mds_state_name(state));
continue;
}
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index fee4c4778313..5ec94bd4c1de 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_read_latency *read;
struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta;
+ struct ceph_metric_dlease *dlease;
struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg;
@@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
- + sizeof(*meta);
+ + sizeof(*meta) + sizeof(*dlease);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) {
@@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
cap->ver = 1;
cap->compat = 1;
cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
- cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
- cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
+ cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
+ cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
cap->total = cpu_to_le64(nr_caps);
items++;
@@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
meta->nsec = cpu_to_le32(ts.tv_nsec);
items++;
+ /* encode the dentry lease metric */
+ dlease = (struct ceph_metric_dlease *)(meta + 1);
+ dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
+ dlease->ver = 1;
+ dlease->compat = 1;
+ dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
+ dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
+ dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
+ dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
+ items++;
+
put_unaligned_le32(items, &head->num);
msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1);
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index 710f3f1dceab..af6038ff39d4 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -27,6 +27,7 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_READ_LATENCY, \
CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \
+ CLIENT_METRIC_TYPE_DENTRY_LEASE, \
\
CLIENT_METRIC_TYPE_MAX, \
}
@@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency {
__le32 nsec;
} __packed;
+/* metric dentry lease header */
+struct ceph_metric_dlease {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(hit + mis + total) */
+ __le64 hit;
+ __le64 mis;
+ __le64 total;
+} __packed;
+
struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */
} __packed;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 9b785f11e95a..4e32c9600ecc 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -264,7 +264,7 @@ restart:
return NULL;
}
-static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
+bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
struct ceph_snap_realm *old_realm, *new_realm;
@@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
return is_updated;
}
-/*
- * ceph_quota_check_rename - check if a rename can be executed
- * @mdsc: MDS client instance
- * @old: inode to be copied
- * @new: destination inode (directory)
- *
- * This function verifies if a rename (e.g. moving a file or directory) can be
- * executed. It forces an rstat update in the @new target directory (and in the
- * source @old as well, if it's a directory). The actual check is done both for
- * max_files and max_bytes.
- *
- * This function returns 0 if it's OK to do the rename, or, if quotas are
- * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
- */
-int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
- struct inode *old, struct inode *new)
-{
- struct ceph_inode_info *ci_old = ceph_inode(old);
- int ret = 0;
-
- if (ceph_quota_is_same_realm(old, new))
- return 0;
-
- /*
- * Get the latest rstat for target directory (and for source, if a
- * directory)
- */
- ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
- if (ret)
- return ret;
-
- if (S_ISDIR(old->i_mode)) {
- ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
- if (ret)
- return ret;
- ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
- ci_old->i_rbytes);
- if (!ret)
- ret = check_quota_exceeded(new,
- QUOTA_CHECK_MAX_FILES_OP,
- ci_old->i_rfiles +
- ci_old->i_rsubdirs);
- if (ret)
- ret = -EXDEV;
- } else {
- ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
- i_size_read(old));
- if (!ret)
- ret = check_quota_exceeded(new,
- QUOTA_CHECK_MAX_FILES_OP, 1);
- if (ret)
- ret = -EDQUOT;
- }
-
- return ret;
-}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 33ba6f0aa55c..9b1b7f4cfdd4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -831,6 +831,13 @@ static void destroy_caches(void)
ceph_fscache_unregister();
}
+static void __ceph_umount_begin(struct ceph_fs_client *fsc)
+{
+ ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
+ ceph_mdsc_force_umount(fsc->mdsc);
+ fsc->filp_gen++; // invalidate open files
+}
+
/*
* ceph_umount_begin - initiate forced umount. Tear down the
* mount, skipping steps that may hang while waiting for server(s).
@@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb)
if (!fsc)
return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
- ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
- ceph_mdsc_force_umount(fsc->mdsc);
- fsc->filp_gen++; // invalidate open files
+ __ceph_umount_begin(fsc);
}
static const struct super_operations ceph_super_ops = {
@@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb)
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
- ceph_umount_begin(sb);
+ fsc->mount_state = CEPH_MOUNT_RECOVER;
+ __ceph_umount_begin(fsc);
/* Make sure all page caches get invalidated.
* see remove_session_caps_cb() */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 482473e4cce1..b62d8fee3b86 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -106,9 +106,8 @@ struct ceph_fs_client {
struct ceph_mount_options *mount_options;
struct ceph_client *client;
- unsigned long mount_state;
+ int mount_state;
- unsigned long last_auto_reconnect;
bool blocklisted;
bool have_copy_from2;
@@ -129,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_metric;
+ struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions;
#endif
@@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
+extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
-extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
- struct inode *old, struct inode *new);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 197cb1234341..24997982de01 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -42,6 +42,7 @@ struct ceph_vxattr {
#define VXATTR_FLAG_READONLY (1<<0)
#define VXATTR_FLAG_HIDDEN (1<<1)
#define VXATTR_FLAG_RSTAT (1<<2)
+#define VXATTR_FLAG_DIRSTAT (1<<3)
/* layouts */
@@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
ci->i_snap_btime.tv_nsec);
}
+static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+
+ return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
+}
+
+static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+
+ return ceph_fmt_xattr(val, size, "client%lld",
+ ceph_client_gid(fsc->client));
+}
+
+static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ int issued;
+
+ spin_lock(&ci->i_ceph_lock);
+ issued = __ceph_caps_issued(ci, NULL);
+ spin_unlock(&ci->i_ceph_lock);
+
+ return ceph_fmt_xattr(val, size, "%s/0x%x",
+ ceph_cap_string(issued), issued);
+}
+
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
- XATTR_NAME_CEPH(dir, entries, 0),
- XATTR_NAME_CEPH(dir, files, 0),
- XATTR_NAME_CEPH(dir, subdirs, 0),
+ XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
+ XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
+ XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
@@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
+ {
+ .name = "ceph.caps",
+ .name_size = sizeof("ceph.caps"),
+ .getxattr_cb = ceph_vxattrcb_caps,
+ .exists_cb = NULL,
+ .flags = VXATTR_FLAG_HIDDEN,
+ },
{ .name = NULL, 0 } /* Required table terminator */
};
@@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
+ {
+ .name = "ceph.caps",
+ .name_size = sizeof("ceph.caps"),
+ .getxattr_cb = ceph_vxattrcb_caps,
+ .exists_cb = NULL,
+ .flags = VXATTR_FLAG_HIDDEN,
+ },
+ { .name = NULL, 0 } /* Required table terminator */
+};
+
+static struct ceph_vxattr ceph_common_vxattrs[] = {
+ {
+ .name = "ceph.cluster_fsid",
+ .name_size = sizeof("ceph.cluster_fsid"),
+ .getxattr_cb = ceph_vxattrcb_cluster_fsid,
+ .exists_cb = NULL,
+ .flags = VXATTR_FLAG_READONLY,
+ },
+ {
+ .name = "ceph.client_id",
+ .name_size = sizeof("ceph.client_id"),
+ .getxattr_cb = ceph_vxattrcb_client_id,
+ .exists_cb = NULL,
+ .flags = VXATTR_FLAG_READONLY,
+ },
{ .name = NULL, 0 } /* Required table terminator */
};
@@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
}
}
+ vxattr = ceph_common_vxattrs;
+ while (vxattr->name) {
+ if (!strcmp(vxattr->name, name))
+ return vxattr;
+ vxattr++;
+ }
+
return NULL;
}
@@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
int mask = 0;
if (vxattr->flags & VXATTR_FLAG_RSTAT)
mask |= CEPH_STAT_RSTAT;
+ if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
+ mask |= CEPH_CAP_FILE_SHARED;
err = ceph_do_getattr(inode, mask, true);
if (err)
return err;
@@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_pagelist *pagelist = NULL;
int op = CEPH_MDS_OP_SETXATTR;
int err;
@@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
if (op == CEPH_MDS_OP_SETXATTR) {
req->r_args.setxattr.flags = cpu_to_le32(flags);
+ req->r_args.setxattr.osdmap_epoch =
+ cpu_to_le32(osdc->osdmap->epoch);
req->r_pagelist = pagelist;
pagelist = NULL;
}