diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-28 15:36:09 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-28 15:36:09 -0800 |
commit | b2deee2dc06db7cdf99b84346e69bdb9db9baa85 (patch) | |
tree | ceb073fa12c1a9804761ec8ce8911a517b007ed6 /include | |
parent | d4f4cf77b37eaea58ef863a4cbc95dad3880b524 (diff) | |
parent | 54ea0046b6fe36ec18e82d282a29a18da6cdea0f (diff) | |
download | linux-b2deee2dc06db7cdf99b84346e69bdb9db9baa85.tar.bz2 |
Merge tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov:
"This time around we have:
- support for rbd data-pool feature, which enables rbd images on
erasure-coded pools (myself). CEPH_PG_MAX_SIZE has been bumped to
allow erasure-coded profiles with k+m up to 32.
- a patch for ceph_d_revalidate() performance regression introduced
in 4.9, along with some cleanups in the area (Jeff Layton)
- a set of fixes for unsafe ->d_parent accesses in CephFS (Jeff
Layton)
- buffered reads are now processed in rsize windows instead of rasize
windows (Andreas Gerstmayr). The new default for rsize mount option
is 64M.
- ack vs commit distinction is gone, greatly simplifying ->fsync()
and MOSDOpReply handling code (myself)
... also a few filesystem bug fixes from Zheng, a CRUSH sync up (CRUSH
computations are still serialized though) and several minor fixes and
cleanups all over"
* tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client: (52 commits)
libceph, rbd, ceph: WRITE | ONDISK -> WRITE
libceph: get rid of ack vs commit
ceph: remove special ack vs commit behavior
ceph: tidy some white space in get_nonsnap_parent()
crush: fix dprintk compilation
crush: do is_out test only if we do not collide
ceph: remove req from unsafe list when unregistering it
rbd: constify device_type structure
rbd: kill obj_request->object_name and rbd_segment_name_cache
rbd: store and use obj_request->object_no
rbd: RBD_V{1,2}_DATA_FORMAT macros
rbd: factor out __rbd_osd_req_create()
rbd: set offset and length outside of rbd_obj_request_create()
rbd: support for data-pool feature
rbd: introduce rbd_init_layout()
rbd: use rbd_obj_bytes() more
rbd: remove now unused rbd_obj_request_wait() and helpers
rbd: switch rbd_obj_method_sync() to ceph_osdc_call()
libceph: pass reply buffer length through ceph_osdc_call()
rbd: do away with obj_request in rbd_obj_read_sync()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/ceph/osd_client.h | 6 | ||||
-rw-r--r-- | include/linux/ceph/osdmap.h | 13 | ||||
-rw-r--r-- | include/linux/ceph/rados.h | 2 | ||||
-rw-r--r-- | include/linux/crush/crush.h | 41 | ||||
-rw-r--r-- | include/linux/crush/mapper.h | 16 |
5 files changed, 54 insertions, 24 deletions
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 03a6653d329a..2ea0c282f3dc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -22,7 +22,6 @@ struct ceph_osd_client; * completion callback for async writepages */ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); -typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); #define CEPH_HOMELESS_OSD -1 @@ -170,15 +169,12 @@ struct ceph_osd_request { unsigned int r_num_ops; int r_result; - bool r_got_reply; struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; - struct completion r_completion; - struct completion r_done_completion; /* fsync waiter */ + struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; - ceph_osdc_unsafe_callback_t r_unsafe_callback; struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 9a9041784dcf..938656f70807 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) case CEPH_POOL_TYPE_EC: return false; default: - BUG_ON(1); + BUG(); } } @@ -82,13 +82,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest, void ceph_oloc_destroy(struct ceph_object_locator *oloc); /* - * Maximum supported by kernel client object name length - * - * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100) - */ -#define CEPH_MAX_OID_NAME_LEN 100 - -/* * 51-char inline_name is long enough for all cephfs and all but one * rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be * arbitrarily long (~PAGE_SIZE). It's done once during rbd map; all @@ -173,8 +166,8 @@ struct ceph_osdmap { * the list of osds that store+replicate them. */ struct crush_map *crush; - struct mutex crush_scratch_mutex; - int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; + struct mutex crush_workspace_mutex; + void *crush_workspace; }; static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 5c0da61cb763..5d0018782d50 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -50,7 +50,7 @@ struct ceph_timespec { #define CEPH_PG_LAYOUT_LINEAR 2 #define CEPH_PG_LAYOUT_HYBRID 3 -#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ +#define CEPH_PG_MAX_SIZE 32 /* max # osds in a single pg */ /* * placement group. diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index be8f12b8f195..fbecbd089d75 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -135,13 +135,6 @@ struct crush_bucket { __u32 size; /* num items */ __s32 *items; - /* - * cached random permutation: used for uniform bucket and for - * the linear search fallback for the other bucket types. - */ - __u32 perm_x; /* @x for which *perm is defined */ - __u32 perm_n; /* num elements of *perm that are permuted/defined */ - __u32 *perm; }; struct crush_bucket_uniform { @@ -211,6 +204,21 @@ struct crush_map { * device fails. */ __u8 chooseleaf_stable; + /* + * This value is calculated after decode or construction by + * the builder. It is exposed here (rather than having a + * 'build CRUSH working space' function) so that callers can + * reserve a static buffer, allocate space on the stack, or + * otherwise avoid calling into the heap allocator if they + * want to. The size of the working space depends on the map, + * while the size of the scratch vector passed to the mapper + * depends on the size of the desired result set. + * + * Nothing stops the caller from allocating both in one swell + * foop and passing in two points, though. + */ + size_t working_size; + #ifndef __KERNEL__ /* * version 0 (original) of straw_calc has various flaws. version 1 @@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i) return ((i+1) << 1)-1; } +/* + * These data structures are private to the CRUSH implementation. They + * are exposed in this header file because builder needs their + * definitions to calculate the total working size. + * + * Moving this out of the crush map allow us to treat the CRUSH map as + * immutable within the mapper and removes the requirement for a CRUSH + * map lock. + */ +struct crush_work_bucket { + __u32 perm_x; /* @x for which *perm is defined */ + __u32 perm_n; /* num elements of *perm that are permuted/defined */ + __u32 *perm; /* Permutation of the bucket's items */ +}; + +struct crush_work { + struct crush_work_bucket **work; /* Per-bucket working store */ +}; + #endif diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 5dfd5b1125d2..c95e19e1ff11 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -15,6 +15,20 @@ extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, const __u32 *weights, int weight_max, - int *scratch); + void *cwin); + +/* + * Returns the exact amount of workspace that will need to be used + * for a given combination of crush_map and result_max. The caller can + * then allocate this much on its own, either on the stack, in a + * per-thread long-lived buffer, or however it likes. + */ +static inline size_t crush_work_size(const struct crush_map *map, + int result_max) +{ + return map->working_size + result_max * 3 * sizeof(__u32); +} + +void crush_init_workspace(const struct crush_map *map, void *v); #endif |