summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-02 19:39:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-02 19:39:09 -0400
commit72b5ac54d620b29cae23d25f0405f2765b466f72 (patch)
tree276e6313a16f0821cb3da7061372f37b0feb8ace /include
parentc7fac299672ee98a1da90ea2e473180fc75d2c53 (diff)
parenta0f2b65275413b3438e9f55b1427273cd893c3b2 (diff)
downloadlinux-72b5ac54d620b29cae23d25f0405f2765b466f72.tar.bz2
Merge tag 'ceph-for-4.8-rc1' of git://github.com/ceph/ceph-client
Pull Ceph updates from Ilya Dryomov: "The highlights are: - RADOS namespace support in libceph and CephFS (Zheng Yan and myself). The stopgaps added in 4.5 to deny access to inodes in namespaces are removed and CEPH_FEATURE_FS_FILE_LAYOUT_V2 feature bit is now fully supported - A large rework of the MDS cap flushing code (Zheng Yan) - Handle some of ->d_revalidate() in RCU mode (Jeff Layton). We were overly pessimistic before, bailing at the first sight of LOOKUP_RCU On top of that we've got a few CephFS bug fixes, a couple of cleanups and Arnd's workaround for a weird genksyms issue" * tag 'ceph-for-4.8-rc1' of git://github.com/ceph/ceph-client: (34 commits) ceph: fix symbol versioning for ceph_monc_do_statfs ceph: Correctly return NXIO errors from ceph_llseek ceph: Mark the file cache as unreclaimable ceph: optimize cap flush waiting ceph: cleanup ceph_flush_snaps() ceph: kick cap flushes before sending other cap message ceph: introduce an inode flag to indicates if snapflush is needed ceph: avoid sending duplicated cap flush message ceph: unify cap flush and snapcap flush ceph: use list instead of rbtree to track cap flushes ceph: update types of some local varibles ceph: include 'follows' of pending snapflush in cap reconnect message ceph: update cap reconnect message to version 3 ceph: mount non-default filesystem by name libceph: fsmap.user subscription support ceph: handle LOOKUP_RCU in ceph_d_revalidate ceph: allow dentry_lease_is_valid to work under RCU walk ceph: clear d_fsinfo pointer under d_lock ceph: remove ceph_mdsc_lease_release ceph: don't use ->d_time ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/ceph/ceph_fs.h55
-rw-r--r--include/linux/ceph/decode.h55
-rw-r--r--include/linux/ceph/libceph.h4
-rw-r--r--include/linux/ceph/mon_client.h7
-rw-r--r--include/linux/ceph/msgpool.h1
-rw-r--r--include/linux/ceph/osd_client.h1
-rw-r--r--include/linux/ceph/osdmap.h15
-rw-r--r--include/linux/ceph/string_table.h62
8 files changed, 160 insertions, 40 deletions
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index dfce616002ad..7868d602c0a0 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -34,9 +34,9 @@
#define CEPH_MAX_MON 31
/*
- * ceph_file_layout - describe data layout for a file/inode
+ * legacy ceph_file_layoute
*/
-struct ceph_file_layout {
+struct ceph_file_layout_legacy {
/* file -> object mapping */
__le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple
of page size. */
@@ -53,33 +53,27 @@ struct ceph_file_layout {
__le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
} __attribute__ ((packed));
-#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
-#define ceph_file_layout_stripe_count(l) \
- ((__s32)le32_to_cpu((l).fl_stripe_count))
-#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
-#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
-#define ceph_file_layout_object_su(l) \
- ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
-#define ceph_file_layout_pg_pool(l) \
- ((__s32)le32_to_cpu((l).fl_pg_pool))
-
-static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
-{
- return le32_to_cpu(l->fl_stripe_unit) *
- le32_to_cpu(l->fl_stripe_count);
-}
-
-/* "period" == bytes before i start on a new set of objects */
-static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
-{
- return le32_to_cpu(l->fl_object_size) *
- le32_to_cpu(l->fl_stripe_count);
-}
+struct ceph_string;
+/*
+ * ceph_file_layout - describe data layout for a file/inode
+ */
+struct ceph_file_layout {
+ /* file -> object mapping */
+ u32 stripe_unit; /* stripe unit, in bytes */
+ u32 stripe_count; /* over this many objects */
+ u32 object_size; /* until objects are this big */
+ s64 pool_id; /* rados pool id */
+ struct ceph_string __rcu *pool_ns; /* rados pool namespace */
+};
+
+extern int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
+extern void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
+ struct ceph_file_layout_legacy *legacy);
+extern void ceph_file_layout_to_legacy(struct ceph_file_layout *fl,
+ struct ceph_file_layout_legacy *legacy);
#define CEPH_MIN_STRIPE_UNIT 65536
-int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
-
struct ceph_dir_layout {
__u8 dl_dir_hash; /* see ceph_hash.h for ids */
__u8 dl_unused1;
@@ -127,6 +121,7 @@ struct ceph_dir_layout {
/* client <-> mds */
#define CEPH_MSG_MDS_MAP 21
+#define CEPH_MSG_FS_MAP_USER 103
#define CEPH_MSG_CLIENT_SESSION 22
#define CEPH_MSG_CLIENT_RECONNECT 23
@@ -399,7 +394,7 @@ union ceph_mds_request_args {
__le32 flags;
} __attribute__ ((packed)) setxattr;
struct {
- struct ceph_file_layout layout;
+ struct ceph_file_layout_legacy layout;
} __attribute__ ((packed)) setlayout;
struct {
__u8 rule; /* currently fcntl or flock */
@@ -478,7 +473,7 @@ struct ceph_mds_reply_inode {
__le64 version; /* inode version */
__le64 xattr_version; /* version for xattr blob */
struct ceph_mds_reply_cap cap; /* caps issued for this inode */
- struct ceph_file_layout layout;
+ struct ceph_file_layout_legacy layout;
struct ceph_timespec ctime, mtime, atime;
__le32 time_warp_seq;
__le64 size, max_size, truncate_size;
@@ -531,7 +526,7 @@ struct ceph_filelock {
#define CEPH_FILE_MODE_WR 2
#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */
#define CEPH_FILE_MODE_LAZY 4 /* lazy io */
-#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */
+#define CEPH_FILE_MODE_BITS 4
int ceph_flags_to_mode(int flags);
@@ -673,7 +668,7 @@ struct ceph_mds_caps {
__le64 size, max_size, truncate_size;
__le32 truncate_seq;
struct ceph_timespec mtime, atime, ctime;
- struct ceph_file_layout layout;
+ struct ceph_file_layout_legacy layout;
__le32 time_warp_seq;
} __attribute__ ((packed));
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 19e9932f3e77..f990f2cc907a 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -3,6 +3,7 @@
#include <linux/err.h>
#include <linux/bug.h>
+#include <linux/slab.h>
#include <linux/time.h>
#include <asm/unaligned.h>
@@ -217,6 +218,60 @@ static inline void ceph_encode_string(void **p, void *end,
*p += len;
}
+/*
+ * version and length starting block encoders/decoders
+ */
+
+/* current code version (u8) + compat code version (u8) + len of struct (u32) */
+#define CEPH_ENCODING_START_BLK_LEN 6
+
+/**
+ * ceph_start_encoding - start encoding block
+ * @struct_v: current (code) version of the encoding
+ * @struct_compat: oldest code version that can decode it
+ * @struct_len: length of struct encoding
+ */
+static inline void ceph_start_encoding(void **p, u8 struct_v, u8 struct_compat,
+ u32 struct_len)
+{
+ ceph_encode_8(p, struct_v);
+ ceph_encode_8(p, struct_compat);
+ ceph_encode_32(p, struct_len);
+}
+
+/**
+ * ceph_start_decoding - start decoding block
+ * @v: current version of the encoding that the code supports
+ * @name: name of the struct (free-form)
+ * @struct_v: out param for the encoding version
+ * @struct_len: out param for the length of struct encoding
+ *
+ * Validates the length of struct encoding, so unsafe ceph_decode_*
+ * variants can be used for decoding.
+ */
+static inline int ceph_start_decoding(void **p, void *end, u8 v,
+ const char *name, u8 *struct_v,
+ u32 *struct_len)
+{
+ u8 struct_compat;
+
+ ceph_decode_need(p, end, CEPH_ENCODING_START_BLK_LEN, bad);
+ *struct_v = ceph_decode_8(p);
+ struct_compat = ceph_decode_8(p);
+ if (v < struct_compat) {
+ pr_warn("got struct_v %d struct_compat %d > %d of %s\n",
+ *struct_v, struct_compat, v, name);
+ return -EINVAL;
+ }
+
+ *struct_len = ceph_decode_32(p);
+ ceph_decode_need(p, end, *struct_len, bad);
+ return 0;
+
+bad:
+ return -ERANGE;
+}
+
#define ceph_encode_need(p, end, n, bad) \
do { \
if (!likely(ceph_has_room(p, end, n))) \
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 690985daad1c..83fc1fff7061 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -21,6 +21,7 @@
#include <linux/ceph/mon_client.h>
#include <linux/ceph/osd_client.h>
#include <linux/ceph/ceph_fs.h>
+#include <linux/ceph/string_table.h>
/*
* mount options
@@ -214,8 +215,9 @@ static void erase_##name(struct rb_root *root, type *t) \
}
#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
+extern type __lookup_##name##_key; \
static type *lookup_##name(struct rb_root *root, \
- typeof(((type *)0)->keyfld) key) \
+ typeof(__lookup_##name##_key.keyfld) key) \
{ \
struct rb_node *n = root->rb_node; \
\
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index e2a92df08b47..24d704d1ea5c 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -95,7 +95,7 @@ struct ceph_mon_client {
struct ceph_mon_subscribe_item item;
bool want;
u32 have; /* epoch */
- } subs[3];
+ } subs[4];
int fs_cluster_id; /* "mdsmap.<id>" sub */
#ifdef CONFIG_DEBUG_FS
@@ -111,9 +111,10 @@ extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
extern void ceph_monc_stop(struct ceph_mon_client *monc);
enum {
- CEPH_SUB_MDSMAP = 0,
- CEPH_SUB_MONMAP,
+ CEPH_SUB_MONMAP = 0,
CEPH_SUB_OSDMAP,
+ CEPH_SUB_FSMAP,
+ CEPH_SUB_MDSMAP,
};
extern const char *ceph_sub_str[];
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
index 4b0d38960726..ddd0d48d0384 100644
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
@@ -2,7 +2,6 @@
#define _FS_CEPH_MSGPOOL
#include <linux/mempool.h>
-#include <linux/ceph/messenger.h>
/*
* we use memory pools for preallocating messages we may receive, to
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 1b3b6e155392..858932304260 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -9,6 +9,7 @@
#include <linux/ceph/types.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/messenger.h>
+#include <linux/ceph/msgpool.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 9ccf4dbe55f8..9a9041784dcf 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -63,11 +63,13 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
struct ceph_object_locator {
s64 pool;
+ struct ceph_string *pool_ns;
};
static inline void ceph_oloc_init(struct ceph_object_locator *oloc)
{
oloc->pool = -1;
+ oloc->pool_ns = NULL;
}
static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc)
@@ -75,11 +77,9 @@ static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc)
return oloc->pool == -1;
}
-static inline void ceph_oloc_copy(struct ceph_object_locator *dest,
- const struct ceph_object_locator *src)
-{
- dest->pool = src->pool;
-}
+void ceph_oloc_copy(struct ceph_object_locator *dest,
+ const struct ceph_object_locator *src);
+void ceph_oloc_destroy(struct ceph_object_locator *oloc);
/*
* Maximum supported by kernel client object name length
@@ -115,6 +115,11 @@ static inline void ceph_oid_init(struct ceph_object_id *oid)
oid->name_len = 0;
}
+#define CEPH_OID_INIT_ONSTACK(oid) \
+ ({ ceph_oid_init(&oid); oid; })
+#define CEPH_DEFINE_OID_ONSTACK(oid) \
+ struct ceph_object_id oid = CEPH_OID_INIT_ONSTACK(oid)
+
static inline bool ceph_oid_empty(const struct ceph_object_id *oid)
{
return oid->name == oid->inline_name && !oid->name_len;
diff --git a/include/linux/ceph/string_table.h b/include/linux/ceph/string_table.h
new file mode 100644
index 000000000000..1b02c96daf75
--- /dev/null
+++ b/include/linux/ceph/string_table.h
@@ -0,0 +1,62 @@
+#ifndef _FS_CEPH_STRING_TABLE_H
+#define _FS_CEPH_STRING_TABLE_H
+
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+#include <linux/rcupdate.h>
+
+struct ceph_string {
+ struct kref kref;
+ union {
+ struct rb_node node;
+ struct rcu_head rcu;
+ };
+ size_t len;
+ char str[];
+};
+
+extern void ceph_release_string(struct kref *ref);
+extern struct ceph_string *ceph_find_or_create_string(const char *str,
+ size_t len);
+extern bool ceph_strings_empty(void);
+
+static inline struct ceph_string *ceph_get_string(struct ceph_string *str)
+{
+ kref_get(&str->kref);
+ return str;
+}
+
+static inline void ceph_put_string(struct ceph_string *str)
+{
+ if (!str)
+ return;
+ kref_put(&str->kref, ceph_release_string);
+}
+
+static inline int ceph_compare_string(struct ceph_string *cs,
+ const char* str, size_t len)
+{
+ size_t cs_len = cs ? cs->len : 0;
+ if (cs_len != len)
+ return cs_len - len;
+ if (len == 0)
+ return 0;
+ return strncmp(cs->str, str, len);
+}
+
+#define ceph_try_get_string(x) \
+({ \
+ struct ceph_string *___str; \
+ rcu_read_lock(); \
+ for (;;) { \
+ ___str = rcu_dereference(x); \
+ if (!___str || \
+ kref_get_unless_zero(&___str->kref)) \
+ break; \
+ } \
+ rcu_read_unlock(); \
+ (___str); \
+})
+
+#endif