summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 14:35:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 14:35:28 -0700
commitcc8362b1f6d724e46f515121d442779924b19fec (patch)
tree86fb5c3767e538ec9ded57dd7b3ce5d69dcde691 /include
parent2e3ee613480563a6d5c01b57d342e65cc58c06df (diff)
parent1fe5e9932156f6122c3b1ff6ba7541c27c86718c (diff)
downloadlinux-cc8362b1f6d724e46f515121d442779924b19fec.tar.bz2
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph changes from Sage Weil: "Lots of stuff this time around: - lots of cleanup and refactoring in the libceph messenger code, and many hard to hit races and bugs closed as a result. - lots of cleanup and refactoring in the rbd code from Alex Elder, mostly in preparation for the layering functionality that will be coming in 3.7. - some misc rbd cleanups from Josh Durgin that are finally going upstream - support for CRUSH tunables (used by newer clusters to improve the data placement) - some cleanup in our use of d_parent that Al brought up a while back - a random collection of fixes across the tree There is another patch coming that fixes up our ->atomic_open() behavior, but I'm going to hammer on it a bit more before sending it." Fix up conflicts due to commits that were already committed earlier in drivers/block/rbd.c, net/ceph/{messenger.c, osd_client.c} * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (132 commits) rbd: create rbd_refresh_helper() rbd: return obj version in __rbd_refresh_header() rbd: fixes in rbd_header_from_disk() rbd: always pass ops array to rbd_req_sync_op() rbd: pass null version pointer in add_snap() rbd: make rbd_create_rw_ops() return a pointer rbd: have __rbd_add_snap_dev() return a pointer libceph: recheck con state after allocating incoming message libceph: change ceph_con_in_msg_alloc convention to be less weird libceph: avoid dropping con mutex before fault libceph: verify state after retaking con lock after dispatch libceph: revoke mon_client messages on session restart libceph: fix handling of immediate socket connect failure ceph: update MAINTAINERS file libceph: be less chatty about stray replies libceph: clear all flags on con_close libceph: clean up con flags libceph: replace connection state bits with states libceph: drop unnecessary CLOSED check in socket state change callback libceph: close socket directly from ceph_con_close() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/ceph/ceph_features.h27
-rw-r--r--include/linux/ceph/ceph_fs.h14
-rw-r--r--include/linux/ceph/decode.h49
-rw-r--r--include/linux/ceph/libceph.h10
-rw-r--r--include/linux/ceph/messenger.h60
-rw-r--r--include/linux/ceph/mon_client.h2
-rw-r--r--include/linux/ceph/msgpool.h3
-rw-r--r--include/linux/crush/crush.h8
8 files changed, 113 insertions, 60 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
new file mode 100644
index 000000000000..dad579b0c0e6
--- /dev/null
+++ b/include/linux/ceph/ceph_features.h
@@ -0,0 +1,27 @@
+#ifndef __CEPH_FEATURES
+#define __CEPH_FEATURES
+
+/*
+ * feature bits
+ */
+#define CEPH_FEATURE_UID (1<<0)
+#define CEPH_FEATURE_NOSRCADDR (1<<1)
+#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
+#define CEPH_FEATURE_FLOCK (1<<3)
+#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
+#define CEPH_FEATURE_MONNAMES (1<<5)
+#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
+#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
+/* bits 8-17 defined by user-space; not supported yet here */
+#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
+
+/*
+ * Features supported.
+ */
+#define CEPH_FEATURES_SUPPORTED_DEFAULT \
+ (CEPH_FEATURE_NOSRCADDR | \
+ CEPH_FEATURE_CRUSH_TUNABLES)
+
+#define CEPH_FEATURES_REQUIRED_DEFAULT \
+ (CEPH_FEATURE_NOSRCADDR)
+#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index e81ab30d4896..d021610efd65 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -35,20 +35,6 @@
/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
#define CEPH_MAX_MON 31
-
-/*
- * feature bits
- */
-#define CEPH_FEATURE_UID (1<<0)
-#define CEPH_FEATURE_NOSRCADDR (1<<1)
-#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
-#define CEPH_FEATURE_FLOCK (1<<3)
-#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
-#define CEPH_FEATURE_MONNAMES (1<<5)
-#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
-#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
-
-
/*
* ceph_file_layout - describe data layout for a file/inode
*/
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index d8615dee5808..4bbf2db45f46 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -1,6 +1,7 @@
#ifndef __CEPH_DECODE_H
#define __CEPH_DECODE_H
+#include <linux/err.h>
#include <linux/bug.h>
#include <linux/time.h>
#include <asm/unaligned.h>
@@ -85,6 +86,52 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
} while (0)
/*
+ * Allocate a buffer big enough to hold the wire-encoded string, and
+ * decode the string into it. The resulting string will always be
+ * terminated with '\0'. If successful, *p will be advanced
+ * past the decoded data. Also, if lenp is not a null pointer, the
+ * length (not including the terminating '\0') will be recorded in
+ * *lenp. Note that a zero-length string is a valid return value.
+ *
+ * Returns a pointer to the newly-allocated string buffer, or a
+ * pointer-coded errno if an error occurs. Neither *p nor *lenp
+ * will have been updated if an error is returned.
+ *
+ * There are two possible failures:
+ * - converting the string would require accessing memory at or
+ * beyond the "end" pointer provided (-E
+ * - memory could not be allocated for the result
+ */
+static inline char *ceph_extract_encoded_string(void **p, void *end,
+ size_t *lenp, gfp_t gfp)
+{
+ u32 len;
+ void *sp = *p;
+ char *buf;
+
+ ceph_decode_32_safe(&sp, end, len, bad);
+ if (!ceph_has_room(&sp, end, len))
+ goto bad;
+
+ buf = kmalloc(len + 1, gfp);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ if (len)
+ memcpy(buf, sp, len);
+ buf[len] = '\0';
+
+ *p = (char *) *p + sizeof (u32) + len;
+ if (lenp)
+ *lenp = (size_t) len;
+
+ return buf;
+
+bad:
+ return ERR_PTR(-ERANGE);
+}
+
+/*
* struct ceph_timespec <-> struct timespec
*/
static inline void ceph_decode_timespec(struct timespec *ts,
@@ -151,7 +198,7 @@ static inline void ceph_encode_filepath(void **p, void *end,
u64 ino, const char *path)
{
u32 len = path ? strlen(path) : 0;
- BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end);
+ BUG_ON(*p + 1 + sizeof(ino) + sizeof(len) + len > end);
ceph_encode_8(p, 1);
ceph_encode_64(p, ino);
ceph_encode_32(p, len);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index e71d683982a6..42624789b06f 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -23,12 +23,6 @@
#include "ceph_fs.h"
/*
- * Supported features
- */
-#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
-#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR
-
-/*
* mount options
*/
#define CEPH_OPT_FSID (1<<0)
@@ -132,7 +126,7 @@ struct ceph_client {
u32 supported_features;
u32 required_features;
- struct ceph_messenger *msgr; /* messenger instance */
+ struct ceph_messenger msgr; /* messenger instance */
struct ceph_mon_client monc;
struct ceph_osd_client osdc;
@@ -160,7 +154,7 @@ struct ceph_client {
struct ceph_snap_context {
atomic_t nref;
u64 seq;
- int num_snaps;
+ u32 num_snaps;
u64 snaps[];
};
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 44c87e731e9d..189ae0637634 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -31,9 +31,6 @@ struct ceph_connection_operations {
int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
int (*invalidate_authorizer)(struct ceph_connection *con);
- /* protocol version mismatch */
- void (*bad_proto) (struct ceph_connection *con);
-
/* there was some error on the socket (disconnect, whatever) */
void (*fault) (struct ceph_connection *con);
@@ -53,6 +50,7 @@ struct ceph_messenger {
struct ceph_entity_inst inst; /* my name+address */
struct ceph_entity_addr my_enc_addr;
+ atomic_t stopping;
bool nocrc;
/*
@@ -80,7 +78,10 @@ struct ceph_msg {
unsigned nr_pages; /* size of page array */
unsigned page_alignment; /* io offset in first page */
struct ceph_pagelist *pagelist; /* instead of pages */
+
+ struct ceph_connection *con;
struct list_head list_head;
+
struct kref kref;
struct bio *bio; /* instead of pages/pagelist */
struct bio *bio_iter; /* bio iterator */
@@ -106,23 +107,6 @@ struct ceph_msg_pos {
#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
/*
- * ceph_connection state bit flags
- */
-#define LOSSYTX 0 /* we can close channel or drop messages on errors */
-#define CONNECTING 1
-#define NEGOTIATING 2
-#define KEEPALIVE_PENDING 3
-#define WRITE_PENDING 4 /* we have data ready to send */
-#define STANDBY 8 /* no outgoing messages, socket closed. we keep
- * the ceph_connection around to maintain shared
- * state with the peer. */
-#define CLOSED 10 /* we've closed the connection */
-#define SOCK_CLOSED 11 /* socket state changed to closed */
-#define OPENING 13 /* open connection w/ (possibly new) peer */
-#define DEAD 14 /* dead, about to kfree */
-#define BACKOFF 15
-
-/*
* A single connection with another host.
*
* We maintain a queue of outgoing messages, and some session state to
@@ -131,18 +115,22 @@ struct ceph_msg_pos {
*/
struct ceph_connection {
void *private;
- atomic_t nref;
const struct ceph_connection_operations *ops;
struct ceph_messenger *msgr;
+
+ atomic_t sock_state;
struct socket *sock;
- unsigned long state; /* connection state (see flags above) */
+ struct ceph_entity_addr peer_addr; /* peer address */
+ struct ceph_entity_addr peer_addr_for_me;
+
+ unsigned long flags;
+ unsigned long state;
const char *error_msg; /* error message, if any */
- struct ceph_entity_addr peer_addr; /* peer address */
struct ceph_entity_name peer_name; /* peer name */
- struct ceph_entity_addr peer_addr_for_me;
+
unsigned peer_features;
u32 connect_seq; /* identify the most recent connection
attempt for this connection, client */
@@ -207,24 +195,26 @@ extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
extern void ceph_msgr_flush(void);
-extern struct ceph_messenger *ceph_messenger_create(
- struct ceph_entity_addr *myaddr,
- u32 features, u32 required);
-extern void ceph_messenger_destroy(struct ceph_messenger *);
+extern void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u32 supported_features,
+ u32 required_features,
+ bool nocrc);
-extern void ceph_con_init(struct ceph_messenger *msgr,
- struct ceph_connection *con);
+extern void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr);
extern void ceph_con_open(struct ceph_connection *con,
+ __u8 entity_type, __u64 entity_num,
struct ceph_entity_addr *addr);
extern bool ceph_con_opened(struct ceph_connection *con);
extern void ceph_con_close(struct ceph_connection *con);
extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
-extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
-extern void ceph_con_revoke_message(struct ceph_connection *con,
- struct ceph_msg *msg);
+
+extern void ceph_msg_revoke(struct ceph_msg *msg);
+extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
+
extern void ceph_con_keepalive(struct ceph_connection *con);
-extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
-extern void ceph_con_put(struct ceph_connection *con);
extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail);
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 545f85917780..2113e3850a4e 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -70,7 +70,7 @@ struct ceph_mon_client {
bool hunting;
int cur_mon; /* last monitor i contacted */
unsigned long sub_sent, sub_renew_after;
- struct ceph_connection *con;
+ struct ceph_connection con;
bool have_fsid;
/* pending generic requests */
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
index a362605f9368..09fa96b43436 100644
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
@@ -11,10 +11,11 @@
struct ceph_msgpool {
const char *name;
mempool_t *pool;
+ int type; /* preallocated message type */
int front_len; /* preallocated payload size */
};
-extern int ceph_msgpool_init(struct ceph_msgpool *pool,
+extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking,
const char *name);
extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 7c4750811b96..25baa287cff7 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -154,6 +154,14 @@ struct crush_map {
__s32 max_buckets;
__u32 max_rules;
__s32 max_devices;
+
+ /* choose local retries before re-descent */
+ __u32 choose_local_tries;
+ /* choose local attempts using a fallback permutation before
+ * re-descent */
+ __u32 choose_local_fallback_tries;
+ /* choose attempts before giving up */
+ __u32 choose_total_tries;
};