summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 11:53:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 11:53:52 -0800
commitbe695ee29e8fc0af266d9f1882868c47da01a790 (patch)
tree085cca4c1a124751d18cd2a06b2fe157daf67e40 /include
parent92dbc9dedccb9759c7f9f2f0ae6242396376988f (diff)
parent2f0df6cfa325d7106b8a65bc0e02db1086e3f73b (diff)
downloadlinux-be695ee29e8fc0af266d9f1882868c47da01a790.tar.bz2
Merge tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "The big ticket item here is support for msgr2 on-wire protocol, which adds the option of full in-transit encryption using AES-GCM algorithm (myself). On top of that we have a series to avoid intermittent errors during recovery with recover_session=clean and some MDS request encoding work from Jeff, a cap handling fix and assorted observability improvements from Luis and Xiubo and a good number of cleanups. Luis also ran into a corner case with quotas which sadly means that we are back to denying cross-quota-realm renames" * tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client: (59 commits) libceph: drop ceph_auth_{create,update}_authorizer() libceph, ceph: make use of __ceph_auth_get_authorizer() in msgr1 libceph, ceph: implement msgr2.1 protocol (crc and secure modes) libceph: introduce connection modes and ms_mode option libceph, rbd: ignore addr->type while comparing in some cases libceph, ceph: get and handle cluster maps with addrvecs libceph: factor out finish_auth() libceph: drop ac->ops->name field libceph: amend cephx init_protocol() and build_request() libceph, ceph: incorporate nautilus cephx changes libceph: safer en/decoding of cephx requests and replies libceph: more insight into ticket expiry and invalidation libceph: move msgr1 protocol specific fields to its own struct libceph: move msgr1 protocol implementation to its own file libceph: separate msgr1 protocol implementation libceph: export remaining protocol independent infrastructure libceph: export zero_page libceph: rename and export con->flags bits libceph: rename and export con->state states libceph: make con->state an int ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/ceph/auth.h68
-rw-r--r--include/linux/ceph/ceph_features.h11
-rw-r--r--include/linux/ceph/ceph_fs.h44
-rw-r--r--include/linux/ceph/decode.h8
-rw-r--r--include/linux/ceph/libceph.h11
-rw-r--r--include/linux/ceph/mdsmap.h2
-rw-r--r--include/linux/ceph/messenger.h285
-rw-r--r--include/linux/ceph/msgr.h66
-rw-r--r--include/linux/ceph/osdmap.h4
9 files changed, 424 insertions, 75 deletions
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 6728c2ee0205..71b5d481c653 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -32,8 +32,6 @@ struct ceph_auth_handshake {
};
struct ceph_auth_client_ops {
- const char *name;
-
/*
* true if we are authenticated and can connect to
* services.
@@ -53,7 +51,9 @@ struct ceph_auth_client_ops {
*/
int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
int (*handle_reply)(struct ceph_auth_client *ac, int result,
- void *buf, void *end);
+ void *buf, void *end, u8 *session_key,
+ int *session_key_len, u8 *con_secret,
+ int *con_secret_len);
/*
* Create authorizer for connecting to a service, and verify
@@ -69,7 +69,10 @@ struct ceph_auth_client_ops {
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
- struct ceph_authorizer *a);
+ struct ceph_authorizer *a,
+ void *reply, int reply_len,
+ u8 *session_key, int *session_key_len,
+ u8 *con_secret, int *con_secret_len);
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
int peer_type);
@@ -95,11 +98,15 @@ struct ceph_auth_client {
const struct ceph_crypto_key *key; /* our secret key */
unsigned want_keys; /* which services we want */
+ int preferred_mode; /* CEPH_CON_MODE_* */
+ int fallback_mode; /* ditto */
+
struct mutex mutex;
};
-extern struct ceph_auth_client *ceph_auth_init(const char *name,
- const struct ceph_crypto_key *key);
+struct ceph_auth_client *ceph_auth_init(const char *name,
+ const struct ceph_crypto_key *key,
+ const int *con_modes);
extern void ceph_auth_destroy(struct ceph_auth_client *ac);
extern void ceph_auth_reset(struct ceph_auth_client *ac);
@@ -113,21 +120,22 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end);
extern int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len);
-
extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
-extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
- int peer_type,
- struct ceph_auth_handshake *auth);
+
+int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
+ struct ceph_auth_handshake *auth,
+ int peer_type, bool force_new,
+ int *proto, int *pref_mode, int *fallb_mode);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
-extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
- int peer_type,
- struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
-extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
- struct ceph_authorizer *a);
+int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ void *reply, int reply_len,
+ u8 *session_key, int *session_key_len,
+ u8 *con_secret, int *con_secret_len);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type);
@@ -147,4 +155,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
return auth->check_message_signature(auth, msg);
return 0;
}
+
+int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len);
+int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
+ int reply_len, void *buf, int buf_len);
+int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
+ u64 global_id, void *reply, int reply_len,
+ u8 *session_key, int *session_key_len,
+ u8 *con_secret, int *con_secret_len);
+bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
+ int used_proto, int result,
+ const int *allowed_protos, int proto_cnt,
+ const int *allowed_modes, int mode_cnt);
+
+int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
+ struct ceph_auth_handshake *auth,
+ int peer_type, void *buf, int *buf_len);
+int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
+ struct ceph_auth_handshake *auth,
+ void *reply, int reply_len,
+ void *buf, int *buf_len);
+int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
+ struct ceph_auth_handshake *auth,
+ void *reply, int reply_len,
+ u8 *session_key, int *session_key_len,
+ u8 *con_secret, int *con_secret_len);
+bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
+ int peer_type, int used_proto, int result,
+ const int *allowed_protos, int proto_cnt,
+ const int *allowed_modes, int mode_cnt);
+
#endif
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 999636d53cf2..3a47acd9cc14 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -8,7 +8,8 @@
* feature. Base case is 1 (first use).
*/
#define CEPH_FEATURE_INCARNATION_1 (0ull)
-#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
+#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
+#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC
#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
@@ -75,7 +76,7 @@
DEFINE_CEPH_FEATURE( 0, 1, UID)
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
-
+DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
@@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
-DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
+DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
DEFINE_CEPH_FEATURE(29, 1, MDSENC)
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
@@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
+ CEPH_FEATURE_SERVER_NAUTILUS | \
CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_SUBSCRIBE2 | \
+ CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \
+ CEPH_FEATURE_MONENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_SERVER_LUMINOUS | \
CEPH_FEATURE_RESEND_ON_SPLIT | \
@@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
+ CEPH_FEATURE_SERVER_MIMIC | \
CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 455e9b9e2adf..e41a811026f6 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -93,8 +93,19 @@ struct ceph_dir_layout {
#define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2
+#define CEPH_AUTH_MODE_NONE 0
+#define CEPH_AUTH_MODE_AUTHORIZER 1
+#define CEPH_AUTH_MODE_MON 10
+
+/* msgr2 protocol modes */
+#define CEPH_CON_MODE_UNKNOWN 0x0
+#define CEPH_CON_MODE_CRC 0x1
+#define CEPH_CON_MODE_SECURE 0x2
+
#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
+const char *ceph_auth_proto_name(int proto);
+const char *ceph_con_mode_name(int mode);
/*********************************************
* message layer
@@ -424,6 +435,7 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) open;
struct {
__le32 flags;
+ __le32 osdmap_epoch; /* used for setting file/dir layouts */
} __attribute__ ((packed)) setxattr;
struct {
struct ceph_file_layout_legacy layout;
@@ -445,11 +457,25 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) lookupino;
} __attribute__ ((packed));
+union ceph_mds_request_args_ext {
+ union ceph_mds_request_args old;
+ struct {
+ __le32 mode;
+ __le32 uid;
+ __le32 gid;
+ struct ceph_timespec mtime;
+ struct ceph_timespec atime;
+ __le64 size, old_size; /* old_size needed by truncate */
+ __le32 mask; /* CEPH_SETATTR_* */
+ struct ceph_timespec btime;
+ } __attribute__ ((packed)) setattr_ext;
+};
+
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
-struct ceph_mds_request_head {
+struct ceph_mds_request_head_old {
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
@@ -462,6 +488,22 @@ struct ceph_mds_request_head {
union ceph_mds_request_args args;
} __attribute__ ((packed));
+#define CEPH_MDS_REQUEST_HEAD_VERSION 1
+
+struct ceph_mds_request_head {
+ __le16 version; /* struct version */
+ __le64 oldest_client_tid;
+ __le32 mdsmap_epoch; /* on client */
+ __le32 flags; /* CEPH_MDS_FLAG_* */
+ __u8 num_retry, num_fwd; /* count retry, fwd attempts */
+ __le16 num_releases; /* # include cap/lease release records */
+ __le32 op; /* mds op code */
+ __le32 caller_uid, caller_gid;
+ __le64 ino; /* use this ino for openc, mkdir, mknod,
+ etc. (if replaying) */
+ union ceph_mds_request_args_ext args;
+} __attribute__ ((packed));
+
/* cap/lease release record */
struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 450384fe487c..04f3ace5787b 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -220,6 +220,8 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
*/
#define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
+#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)
+#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3)
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{
@@ -239,6 +241,12 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr);
+int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
+ struct ceph_entity_addr *addr);
+
+int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr);
+void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr);
+
/*
* encoders
*/
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index c8645f0b797d..eb9008bb3992 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -31,10 +31,10 @@
#define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
-#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
+#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */
#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
-#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
+#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
@@ -53,6 +53,7 @@ struct ceph_options {
unsigned long osd_keepalive_timeout; /* jiffies */
unsigned long osd_request_timeout; /* jiffies */
u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
+ int con_modes[2]; /* CEPH_CON_MODE_* */
/*
* any type that can't be simply compared or doesn't need
@@ -83,6 +84,7 @@ struct ceph_options {
#define CEPH_MONC_HUNT_BACKOFF 2
#define CEPH_MONC_HUNT_MAX_MULT 10
+#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
@@ -104,6 +106,7 @@ enum {
CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN,
+ CEPH_MOUNT_RECOVER,
};
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
@@ -150,6 +153,10 @@ struct ceph_client {
#define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
+static inline bool ceph_msgr2(struct ceph_client *client)
+{
+ return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN;
+}
/*
* snapshots
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 35d385296fbb..523fd0452856 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
}
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
-extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
+struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 60b324efd1c4..0e6e9ad3c3bf 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -3,6 +3,7 @@
#define __FS_CEPH_MESSENGER_H
#include <linux/bvec.h>
+#include <linux/crypto.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/net.h>
@@ -52,6 +53,23 @@ struct ceph_connection_operations {
int (*sign_message) (struct ceph_msg *msg);
int (*check_message_signature) (struct ceph_msg *msg);
+
+ /* msgr2 authentication exchange */
+ int (*get_auth_request)(struct ceph_connection *con,
+ void *buf, int *buf_len,
+ void **authorizer, int *authorizer_len);
+ int (*handle_auth_reply_more)(struct ceph_connection *con,
+ void *reply, int reply_len,
+ void *buf, int *buf_len,
+ void **authorizer, int *authorizer_len);
+ int (*handle_auth_done)(struct ceph_connection *con,
+ u64 global_id, void *reply, int reply_len,
+ u8 *session_key, int *session_key_len,
+ u8 *con_secret, int *con_secret_len);
+ int (*handle_auth_bad_method)(struct ceph_connection *con,
+ int used_proto, int result,
+ const int *allowed_protos, int proto_cnt,
+ const int *allowed_modes, int mode_cnt);
};
/* use format string %s%lld */
@@ -235,14 +253,171 @@ struct ceph_msg {
bool more_to_follow;
bool needs_out_seq;
int front_alloc_len;
- unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool;
};
+/*
+ * connection states
+ */
+#define CEPH_CON_S_CLOSED 1
+#define CEPH_CON_S_PREOPEN 2
+#define CEPH_CON_S_V1_BANNER 3
+#define CEPH_CON_S_V1_CONNECT_MSG 4
+#define CEPH_CON_S_V2_BANNER_PREFIX 5
+#define CEPH_CON_S_V2_BANNER_PAYLOAD 6
+#define CEPH_CON_S_V2_HELLO 7
+#define CEPH_CON_S_V2_AUTH 8
+#define CEPH_CON_S_V2_AUTH_SIGNATURE 9
+#define CEPH_CON_S_V2_SESSION_CONNECT 10
+#define CEPH_CON_S_V2_SESSION_RECONNECT 11
+#define CEPH_CON_S_OPEN 12
+#define CEPH_CON_S_STANDBY 13
+
+/*
+ * ceph_connection flag bits
+ */
+#define CEPH_CON_F_LOSSYTX 0 /* we can close channel or drop
+ messages on errors */
+#define CEPH_CON_F_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
+#define CEPH_CON_F_WRITE_PENDING 2 /* we have data ready to send */
+#define CEPH_CON_F_SOCK_CLOSED 3 /* socket state changed to closed */
+#define CEPH_CON_F_BACKOFF 4 /* need to retry queuing delayed
+ work */
+
/* ceph connection fault delay defaults, for exponential backoff */
-#define BASE_DELAY_INTERVAL (HZ/2)
-#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
+#define BASE_DELAY_INTERVAL (HZ / 4)
+#define MAX_DELAY_INTERVAL (15 * HZ)
+
+struct ceph_connection_v1_info {
+ struct kvec out_kvec[8], /* sending header/footer data */
+ *out_kvec_cur;
+ int out_kvec_left; /* kvec's left in out_kvec */
+ int out_skip; /* skip this many bytes */
+ int out_kvec_bytes; /* total bytes left */
+ bool out_more; /* there is more data after the kvecs */
+ bool out_msg_done;
+
+ struct ceph_auth_handshake *auth;
+ int auth_retry; /* true if we need a newer authorizer */
+
+ /* connection negotiation temps */
+ u8 in_banner[CEPH_BANNER_MAX_LEN];
+ struct ceph_entity_addr actual_peer_addr;
+ struct ceph_entity_addr peer_addr_for_me;
+ struct ceph_msg_connect out_connect;
+ struct ceph_msg_connect_reply in_reply;
+
+ int in_base_pos; /* bytes read */
+
+ /* message in temps */
+ u8 in_tag; /* protocol control byte */
+ struct ceph_msg_header in_hdr;
+ __le64 in_temp_ack; /* for reading an ack */
+
+ /* message out temps */
+ struct ceph_msg_header out_hdr;
+ __le64 out_temp_ack; /* for writing an ack */
+ struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
+ stamp */
+
+ u32 connect_seq; /* identify the most recent connection
+ attempt for this session */
+ u32 peer_global_seq; /* peer's global seq for this connection */
+};
+
+#define CEPH_CRC_LEN 4
+#define CEPH_GCM_KEY_LEN 16
+#define CEPH_GCM_IV_LEN sizeof(struct ceph_gcm_nonce)
+#define CEPH_GCM_BLOCK_LEN 16
+#define CEPH_GCM_TAG_LEN 16
+
+#define CEPH_PREAMBLE_LEN 32
+#define CEPH_PREAMBLE_INLINE_LEN 48
+#define CEPH_PREAMBLE_PLAIN_LEN CEPH_PREAMBLE_LEN
+#define CEPH_PREAMBLE_SECURE_LEN (CEPH_PREAMBLE_LEN + \
+ CEPH_PREAMBLE_INLINE_LEN + \
+ CEPH_GCM_TAG_LEN)
+#define CEPH_EPILOGUE_PLAIN_LEN (1 + 3 * CEPH_CRC_LEN)
+#define CEPH_EPILOGUE_SECURE_LEN (CEPH_GCM_BLOCK_LEN + CEPH_GCM_TAG_LEN)
+
+#define CEPH_FRAME_MAX_SEGMENT_COUNT 4
+
+struct ceph_frame_desc {
+ int fd_tag; /* FRAME_TAG_* */
+ int fd_seg_cnt;
+ int fd_lens[CEPH_FRAME_MAX_SEGMENT_COUNT]; /* logical */
+ int fd_aligns[CEPH_FRAME_MAX_SEGMENT_COUNT];
+};
+
+struct ceph_gcm_nonce {
+ __le32 fixed;
+ __le64 counter __packed;
+};
+
+struct ceph_connection_v2_info {
+ struct iov_iter in_iter;
+ struct kvec in_kvecs[5]; /* recvmsg */
+ struct bio_vec in_bvec; /* recvmsg (in_cursor) */
+ int in_kvec_cnt;
+ int in_state; /* IN_S_* */
+
+ struct iov_iter out_iter;
+ struct kvec out_kvecs[8]; /* sendmsg */
+ struct bio_vec out_bvec; /* sendpage (out_cursor, out_zero),
+ sendmsg (out_enc_pages) */
+ int out_kvec_cnt;
+ int out_state; /* OUT_S_* */
+
+ int out_zero; /* # of zero bytes to send */
+ bool out_iter_sendpage; /* use sendpage if possible */
+
+ struct ceph_frame_desc in_desc;
+ struct ceph_msg_data_cursor in_cursor;
+ struct ceph_msg_data_cursor out_cursor;
+
+ struct crypto_shash *hmac_tfm; /* post-auth signature */
+ struct crypto_aead *gcm_tfm; /* on-wire encryption */
+ struct aead_request *gcm_req;
+ struct crypto_wait gcm_wait;
+ struct ceph_gcm_nonce in_gcm_nonce;
+ struct ceph_gcm_nonce out_gcm_nonce;
+
+ struct page **out_enc_pages;
+ int out_enc_page_cnt;
+ int out_enc_resid;
+ int out_enc_i;
+
+ int con_mode; /* CEPH_CON_MODE_* */
+
+ void *conn_bufs[16];
+ int conn_buf_cnt;
+
+ struct kvec in_sign_kvecs[8];
+ struct kvec out_sign_kvecs[8];
+ int in_sign_kvec_cnt;
+ int out_sign_kvec_cnt;
+
+ u64 client_cookie;
+ u64 server_cookie;
+ u64 global_seq;
+ u64 connect_seq;
+ u64 peer_global_seq;
+
+ u8 in_buf[CEPH_PREAMBLE_SECURE_LEN];
+ u8 out_buf[CEPH_PREAMBLE_SECURE_LEN];
+ struct {
+ u8 late_status; /* FRAME_LATE_STATUS_* */
+ union {
+ struct {
+ u32 front_crc;
+ u32 middle_crc;
+ u32 data_crc;
+ } __packed;
+ u8 pad[CEPH_GCM_BLOCK_LEN - 1];
+ };
+ } out_epil;
+};
/*
* A single connection with another host.
@@ -258,24 +433,16 @@ struct ceph_connection {
struct ceph_messenger *msgr;
+ int state; /* CEPH_CON_S_* */
atomic_t sock_state;
struct socket *sock;
- struct ceph_entity_addr peer_addr; /* peer address */
- struct ceph_entity_addr peer_addr_for_me;
- unsigned long flags;
- unsigned long state;
+ unsigned long flags; /* CEPH_CON_F_* */
const char *error_msg; /* error message, if any */
struct ceph_entity_name peer_name; /* peer name */
-
+ struct ceph_entity_addr peer_addr; /* peer address */
u64 peer_features;
- u32 connect_seq; /* identify the most recent connection
- attempt for this connection, client */
- u32 peer_global_seq; /* peer's global seq for this connection */
-
- struct ceph_auth_handshake *auth;
- int auth_retry; /* true if we need a newer authorizer */
struct mutex mutex;
@@ -286,43 +453,80 @@ struct ceph_connection {
u64 in_seq, in_seq_acked; /* last message received, acked */
- /* connection negotiation temps */
- char in_banner[CEPH_BANNER_MAX_LEN];
- struct ceph_msg_connect out_connect;
- struct ceph_msg_connect_reply in_reply;
- struct ceph_entity_addr actual_peer_addr;
-
- /* message out temps */
- struct ceph_msg_header out_hdr;
+ struct ceph_msg *in_msg;
struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */
- bool out_msg_done;
-
- struct kvec out_kvec[8], /* sending header/footer data */
- *out_kvec_cur;
- int out_kvec_left; /* kvec's left in out_kvec */
- int out_skip; /* skip this many bytes */
- int out_kvec_bytes; /* total bytes left */
- int out_more; /* there is more data after the kvecs */
- __le64 out_temp_ack; /* for writing an ack */
- struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
- stamp */
- /* message in temps */
- struct ceph_msg_header in_hdr;
- struct ceph_msg *in_msg;
u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
- char in_tag; /* protocol control byte */
- int in_base_pos; /* bytes read */
- __le64 in_temp_ack; /* for reading an ack */
-
struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
struct delayed_work work; /* send|recv work */
unsigned long delay; /* current delay interval */
+
+ union {
+ struct ceph_connection_v1_info v1;
+ struct ceph_connection_v2_info v2;
+ };
};
+extern struct page *ceph_zero_page;
+
+void ceph_con_flag_clear(struct ceph_connection *con, unsigned long con_flag);
+void ceph_con_flag_set(struct ceph_connection *con, unsigned long con_flag);
+bool ceph_con_flag_test(struct ceph_connection *con, unsigned long con_flag);
+bool ceph_con_flag_test_and_clear(struct ceph_connection *con,
+ unsigned long con_flag);
+bool ceph_con_flag_test_and_set(struct ceph_connection *con,
+ unsigned long con_flag);
+
+void ceph_encode_my_addr(struct ceph_messenger *msgr);
+
+int ceph_tcp_connect(struct ceph_connection *con);
+int ceph_con_close_socket(struct ceph_connection *con);
+void ceph_con_reset_session(struct ceph_connection *con);
+
+u32 ceph_get_global_seq(struct ceph_messenger *msgr, u32 gt);
+void ceph_con_discard_sent(struct ceph_connection *con, u64 ack_seq);
+void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq);
+
+void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
+ struct ceph_msg *msg, size_t length);
+struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length,
+ bool *last_piece);
+void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes);
+
+u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset,
+ unsigned int length);
+
+bool ceph_addr_is_blank(const struct ceph_entity_addr *addr);
+int ceph_addr_port(const struct ceph_entity_addr *addr);
+void ceph_addr_set_port(struct ceph_entity_addr *addr, int p);
+
+void ceph_con_process_message(struct ceph_connection *con);
+int ceph_con_in_msg_alloc(struct ceph_connection *con,
+ struct ceph_msg_header *hdr, int *skip);
+void ceph_con_get_out_msg(struct ceph_connection *con);
+
+/* messenger_v1.c */
+int ceph_con_v1_try_read(struct ceph_connection *con);
+int ceph_con_v1_try_write(struct ceph_connection *con);
+void ceph_con_v1_revoke(struct ceph_connection *con);
+void ceph_con_v1_revoke_incoming(struct ceph_connection *con);
+bool ceph_con_v1_opened(struct ceph_connection *con);
+void ceph_con_v1_reset_session(struct ceph_connection *con);
+void ceph_con_v1_reset_protocol(struct ceph_connection *con);
+
+/* messenger_v2.c */
+int ceph_con_v2_try_read(struct ceph_connection *con);
+int ceph_con_v2_try_write(struct ceph_connection *con);
+void ceph_con_v2_revoke(struct ceph_connection *con);
+void ceph_con_v2_revoke_incoming(struct ceph_connection *con);
+bool ceph_con_v2_opened(struct ceph_connection *con);
+void ceph_con_v2_reset_session(struct ceph_connection *con);
+void ceph_con_v2_reset_protocol(struct ceph_connection *con);
+
extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
@@ -330,7 +534,6 @@ extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
int max_count, int *count);
-
extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
extern void ceph_msgr_flush(void);
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 9e50aede46c8..f5e02f6c0655 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -9,24 +9,45 @@
#define CEPH_MON_PORT 6789 /* default monitor port */
/*
- * client-side processes will try to bind to ports in this
- * range, simply for the benefit of tools like nmap or wireshark
- * that would like to identify the protocol.
- */
-#define CEPH_PORT_FIRST 6789
-#define CEPH_PORT_START 6800 /* non-monitors start here */
-#define CEPH_PORT_LAST 6900
-
-/*
* tcp connection banner. include a protocol version. and adjust
* whenever the wire protocol changes. try to keep this string length
* constant.
*/
#define CEPH_BANNER "ceph v027"
+#define CEPH_BANNER_LEN 9
#define CEPH_BANNER_MAX_LEN 30
/*
+ * messenger V2 connection banner prefix.
+ * The full banner string should have the form: "ceph v2\n<le16>"
+ * the 2 bytes are the length of the remaining banner.
+ */
+#define CEPH_BANNER_V2 "ceph v2\n"
+#define CEPH_BANNER_V2_LEN 8
+#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16))
+
+/*
+ * messenger V2 features
+ */
+#define CEPH_MSGR2_INCARNATION_1 (0ull)
+
+#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \
+ static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
+ static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \
+ (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
+
+#define HAVE_MSGR2_FEATURE(x, name) \
+ (((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name))
+
+DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1
+
+#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
+
+#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
+
+
+/*
* Rollover-safe type and comparator for 32-bit sequence numbers.
* Comparator returns -1, 0, or 1.
*/
@@ -61,11 +82,18 @@ extern const char *ceph_entity_type_name(int type);
* entity_addr -- network address
*/
struct ceph_entity_addr {
- __le32 type;
+ __le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */
__le32 nonce; /* unique id for process (e.g. pid) */
struct sockaddr_storage in_addr;
} __attribute__ ((packed));
+static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs,
+ const struct ceph_entity_addr *rhs)
+{
+ return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) &&
+ lhs->nonce == rhs->nonce;
+}
+
struct ceph_entity_inst {
struct ceph_entity_name name;
struct ceph_entity_addr addr;
@@ -160,6 +188,24 @@ struct ceph_msg_header {
__le32 crc; /* header crc32c */
} __attribute__ ((packed));
+struct ceph_msg_header2 {
+ __le64 seq; /* message seq# for this session */
+ __le64 tid; /* transaction id */
+ __le16 type; /* message type */
+ __le16 priority; /* priority. higher value == higher priority */
+ __le16 version; /* version of message encoding */
+
+ __le32 data_pre_padding_len;
+ __le16 data_off; /* sender: include full offset;
+ receiver: mask against ~PAGE_MASK */
+
+ __le64 ack_seq;
+ __u8 flags;
+ /* oldest code we think can decode this. unknown if zero. */
+ __le16 compat_version;
+ __le16 reserved;
+} __attribute__ ((packed));
+
#define CEPH_MSG_PRIO_LOW 64
#define CEPH_MSG_PRIO_DEFAULT 127
#define CEPH_MSG_PRIO_HIGH 196
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index cad9acfbc320..5553019c3f07 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
}
struct ceph_osdmap *ceph_osdmap_alloc(void);
-extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
-struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
+struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);