summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c63
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.c1
-rw-r--r--fs/ceph/auth_x.c1
-rw-r--r--fs/ceph/buffer.c3
-rw-r--r--fs/ceph/caps.c43
-rw-r--r--fs/ceph/crypto.c1
-rw-r--r--fs/ceph/debugfs.c1
-rw-r--r--fs/ceph/dir.c8
-rw-r--r--fs/ceph/export.c1
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/ceph/inode.c10
-rw-r--r--fs/ceph/mds_client.c1
-rw-r--r--fs/ceph/messenger.c10
-rw-r--r--fs/ceph/mon_client.c1
-rw-r--r--fs/ceph/osdmap.c184
-rw-r--r--fs/ceph/osdmap.h1
-rw-r--r--fs/ceph/pagelist.c1
-rw-r--r--fs/ceph/rados.h6
-rw-r--r--fs/ceph/snap.c27
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c1
23 files changed, 236 insertions, 135 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ce8ef6107727..412593703d1e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/writeback.h> /* generic_writepages */
+#include <linux/slab.h>
#include <linux/pagevec.h>
#include <linux/task_io_accounting_ops.h>
@@ -336,16 +337,15 @@ out:
/*
* Get ref for the oldest snapc for an inode with dirty data... that is, the
* only snap context we are allowed to write back.
- *
- * Caller holds i_lock.
*/
-static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
- u64 *snap_size)
+static struct ceph_snap_context *get_oldest_context(struct inode *inode,
+ u64 *snap_size)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc = NULL;
struct ceph_cap_snap *capsnap = NULL;
+ spin_lock(&inode->i_lock);
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
capsnap->context, capsnap->dirty_pages);
@@ -356,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
break;
}
}
- if (!snapc && ci->i_snap_realm) {
- snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
+ if (!snapc && ci->i_head_snapc) {
+ snapc = ceph_get_snap_context(ci->i_head_snapc);
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
}
- return snapc;
-}
-
-static struct ceph_snap_context *get_oldest_context(struct inode *inode,
- u64 *snap_size)
-{
- struct ceph_snap_context *snapc = NULL;
-
- spin_lock(&inode->i_lock);
- snapc = __get_oldest_context(inode, snap_size);
spin_unlock(&inode->i_lock);
return snapc;
}
@@ -391,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
int len = PAGE_CACHE_SIZE;
loff_t i_size;
int err = 0;
- struct ceph_snap_context *snapc;
+ struct ceph_snap_context *snapc, *oldest;
u64 snap_size = 0;
long writeback_stat;
@@ -412,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
}
- if (snapc != get_oldest_context(inode, &snap_size)) {
+ oldest = get_oldest_context(inode, &snap_size);
+ if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
inode, page, (void *)page->private);
/* we should only noop if called by kswapd */
WARN_ON((current->flags & PF_MEMALLOC) == 0);
+ ceph_put_snap_context(oldest);
goto out;
}
+ ceph_put_snap_context(oldest);
/* is this a partial page at end of file? */
if (snap_size)
@@ -457,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
ClearPagePrivate(page);
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
- ceph_put_snap_context(snapc);
+ ceph_put_snap_context(snapc); /* page's reference */
out:
return err;
}
@@ -557,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
dout("inode %p skipping page %p\n", inode, page);
wbc->pages_skipped++;
}
+ ceph_put_snap_context((void *)page->private);
page->private = 0;
ClearPagePrivate(page);
- ceph_put_snap_context(snapc);
dout("unlocking %d %p\n", i, page);
end_page_writeback(page);
@@ -617,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
int range_whole = 0;
int should_loop = 1;
pgoff_t max_pages = 0, max_pages_ever = 0;
- struct ceph_snap_context *snapc = NULL, *last_snapc = NULL;
+ struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
struct pagevec pvec;
int done = 0;
int rc = 0;
@@ -769,9 +762,10 @@ get_more_pages:
}
/* only if matching snap context */
- if (snapc != (void *)page->private) {
- dout("page snapc %p != oldest %p\n",
- (void *)page->private, snapc);
+ pgsnapc = (void *)page->private;
+ if (pgsnapc->seq > snapc->seq) {
+ dout("page snapc %p %lld > oldest %p %lld\n",
+ pgsnapc, pgsnapc->seq, snapc, snapc->seq);
unlock_page(page);
if (!locked_pages)
continue; /* keep looking for snap */
@@ -913,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode,
struct ceph_snap_context *snapc)
{
struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
- return !oldest || snapc->seq <= oldest->seq;
+ int ret = !oldest || snapc->seq <= oldest->seq;
+
+ ceph_put_snap_context(oldest);
+ return ret;
}
/*
@@ -935,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
int pos_in_page = pos & ~PAGE_CACHE_MASK;
int end_in_page = pos_in_page + len;
loff_t i_size;
- struct ceph_snap_context *snapc;
int r;
+ struct ceph_snap_context *snapc, *oldest;
retry_locked:
/* writepages currently holds page lock, but if we change that later, */
@@ -946,23 +943,24 @@ retry_locked:
BUG_ON(!ci->i_snap_realm);
down_read(&mdsc->snap_rwsem);
BUG_ON(!ci->i_snap_realm->cached_context);
- if (page->private &&
- (void *)page->private != ci->i_snap_realm->cached_context) {
+ snapc = (void *)page->private;
+ if (snapc && snapc != ci->i_head_snapc) {
/*
* this page is already dirty in another (older) snap
* context! is it writeable now?
*/
- snapc = get_oldest_context(inode, NULL);
+ oldest = get_oldest_context(inode, NULL);
up_read(&mdsc->snap_rwsem);
- if (snapc != (void *)page->private) {
+ if (snapc->seq > oldest->seq) {
+ ceph_put_snap_context(oldest);
dout(" page %p snapc %p not current or oldest\n",
- page, (void *)page->private);
+ page, snapc);
/*
* queue for writeback, and wait for snapc to
* be writeable or written
*/
- snapc = ceph_get_snap_context((void *)page->private);
+ snapc = ceph_get_snap_context(snapc);
unlock_page(page);
ceph_queue_writeback(inode);
r = wait_event_interruptible(ci->i_cap_wq,
@@ -972,6 +970,7 @@ retry_locked:
return r;
return -EAGAIN;
}
+ ceph_put_snap_context(oldest);
/* yay, writeable, do it now (without dropping page lock) */
dout(" page %p snapc %p not current, but oldest\n",
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index abb204fea6c7..f6394b94b866 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -1,6 +1,7 @@
#include "ceph_debug.h"
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/err.h>
#include "types.h"
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index b4ef6f0a6c85..8cd9e3af07f7 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -4,6 +4,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include "auth_none.h"
#include "auth.h"
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 8d8a84964763..d9001a4dc8cc 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -4,6 +4,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include "auth_x.h"
#include "auth_x_protocol.h"
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c
index b98086c7aeba..c67535d70aa6 100644
--- a/fs/ceph/buffer.c
+++ b/fs/ceph/buffer.c
@@ -1,5 +1,8 @@
#include "ceph_debug.h"
+
+#include <linux/slab.h>
+
#include "buffer.h"
#include "decode.h"
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7d0a0d0adc18..aa2239fa9a3b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3,6 +3,7 @@
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/writeback.h>
@@ -1204,6 +1205,12 @@ retry:
if (capsnap->dirty_pages || capsnap->writing)
continue;
+ /*
+ * if cap writeback already occurred, we should have dropped
+ * the capsnap in ceph_put_wrbuffer_cap_refs.
+ */
+ BUG_ON(capsnap->dirty == 0);
+
/* pick mds, take s_mutex */
mds = __ceph_get_cap_mds(ci, &mseq);
if (session && session->s_mds != mds) {
@@ -2117,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
}
spin_unlock(&inode->i_lock);
- dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
- last ? "last" : "");
+ dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
+ last ? " last" : "", put ? " put" : "");
if (last && !flushsnaps)
ceph_check_caps(ci, 0, NULL);
@@ -2142,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
{
struct inode *inode = &ci->vfs_inode;
int last = 0;
- int last_snap = 0;
+ int complete_capsnap = 0;
+ int drop_capsnap = 0;
int found = 0;
struct ceph_cap_snap *capsnap = NULL;
@@ -2165,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
if (capsnap->context == snapc) {
found = 1;
- capsnap->dirty_pages -= nr;
- last_snap = !capsnap->dirty_pages;
break;
}
}
BUG_ON(!found);
+ capsnap->dirty_pages -= nr;
+ if (capsnap->dirty_pages == 0) {
+ complete_capsnap = 1;
+ if (capsnap->dirty == 0)
+ /* cap writeback completed before we created
+ * the cap_snap; no FLUSHSNAP is needed */
+ drop_capsnap = 1;
+ }
dout("put_wrbuffer_cap_refs on %p cap_snap %p "
- " snap %lld %d/%d -> %d/%d %s%s\n",
+ " snap %lld %d/%d -> %d/%d %s%s%s\n",
inode, capsnap, capsnap->context->seq,
ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
ci->i_wrbuffer_ref, capsnap->dirty_pages,
last ? " (wrbuffer last)" : "",
- last_snap ? " (capsnap last)" : "");
+ complete_capsnap ? " (complete capsnap)" : "",
+ drop_capsnap ? " (drop capsnap)" : "");
+ if (drop_capsnap) {
+ ceph_put_snap_context(capsnap->context);
+ list_del(&capsnap->ci_item);
+ list_del(&capsnap->flushing_item);
+ ceph_put_cap_snap(capsnap);
+ }
}
spin_unlock(&inode->i_lock);
@@ -2185,10 +2206,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
if (last) {
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
iput(inode);
- } else if (last_snap) {
+ } else if (complete_capsnap) {
ceph_flush_snaps(ci);
wake_up(&ci->i_cap_wq);
}
+ if (drop_capsnap)
+ iput(inode);
}
/*
@@ -2464,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
break;
}
WARN_ON(capsnap->dirty_pages || capsnap->writing);
- dout(" removing cap_snap %p follows %lld\n",
- capsnap, follows);
+ dout(" removing %p cap_snap %p follows %lld\n",
+ inode, capsnap, follows);
ceph_put_snap_context(capsnap->context);
list_del(&capsnap->ci_item);
list_del(&capsnap->flushing_item);
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index 291ac288e791..f704b3b62424 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -3,6 +3,7 @@
#include <linux/err.h>
#include <linux/scatterlist.h>
+#include <linux/slab.h>
#include <crypto/hash.h>
#include "crypto.h"
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index e159f1415110..f7048da92acc 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -1,6 +1,7 @@
#include "ceph_debug.h"
#include <linux/device.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8a9116e15b70..ea8ee2e526aa 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -3,6 +3,7 @@
#include <linux/spinlock.h>
#include <linux/fs_struct.h>
#include <linux/namei.h>
+#include <linux/slab.h>
#include <linux/sched.h>
#include "super.h"
@@ -170,11 +171,11 @@ more:
spin_lock(&inode->i_lock);
spin_lock(&dcache_lock);
+ last = dentry;
+
if (err < 0)
goto out_unlock;
- last = dentry;
-
p = p->prev;
filp->f_pos++;
@@ -311,7 +312,7 @@ more:
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);
req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
- req->r_num_caps = max_entries;
+ req->r_num_caps = max_entries + 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
ceph_mdsc_put_request(req);
@@ -488,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct inode *inode = ceph_get_snapdir(parent);
dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
dentry, dentry->d_name.len, dentry->d_name.name, inode);
+ BUG_ON(!d_unhashed(dentry));
d_add(dentry, inode);
err = 0;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fc68e39cbad6..9d67572fb328 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -1,6 +1,7 @@
#include "ceph_debug.h"
#include <linux/exportfs.h>
+#include <linux/slab.h>
#include <asm/unaligned.h>
#include "super.h"
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5d2af8464f6a..4add3d5da2c1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1,6 +1,7 @@
#include "ceph_debug.h"
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/file.h>
#include <linux/namei.h>
#include <linux/writeback.h>
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index aca82d55cc53..26f883c275e8 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -886,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct inode *in = NULL;
struct ceph_mds_reply_inode *ininfo;
struct ceph_vino vino;
+ struct ceph_client *client = ceph_sb_to_client(sb);
int i = 0;
int err = 0;
@@ -949,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
return err;
}
- if (rinfo->head->is_dentry && !req->r_aborted) {
+ /*
+ * ignore null lease/binding on snapdir ENOENT, or else we
+ * will have trouble splicing in the virtual snapdir later
+ */
+ if (rinfo->head->is_dentry && !req->r_aborted &&
+ (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
+ client->mount_args->snapdir_name,
+ req->r_dentry->d_name.len))) {
/*
* lookup link rename : null -> possibly existing inode
* mknod symlink mkdir : null -> new inode
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5c7920be6420..60a9a4ae47be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1,6 +1,7 @@
#include "ceph_debug.h"
#include <linux/wait.h>
+#include <linux/slab.h>
#include <linux/sched.h>
#include "mds_client.h"
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index a32f0f896d9f..cdaaa131add3 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -6,6 +6,7 @@
#include <linux/inet.h>
#include <linux/kthread.h>
#include <linux/net.h>
+#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/string.h>
#include <net/tcp.h>
@@ -29,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key socket_class;
+#endif
+
static void queue_con(struct ceph_connection *con);
static void con_work(struct work_struct *);
@@ -227,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
con->sock = sock;
sock->sk->sk_allocation = GFP_NOFS;
+#ifdef CONFIG_LOCKDEP
+ lockdep_set_class(&sock->sk->sk_lock, &socket_class);
+#endif
+
set_sock_callbacks(sock, con);
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
@@ -332,6 +341,7 @@ static void reset_connection(struct ceph_connection *con)
con->out_msg = NULL;
}
con->in_seq = 0;
+ con->in_seq_acked = 0;
}
/*
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 890597c09d43..8fdc011ca956 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -1,6 +1,7 @@
#include "ceph_debug.h"
#include <linux/types.h>
+#include <linux/slab.h>
#include <linux/random.h>
#include <linux/sched.h>
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index d82fe87c2a6e..2e2c15eed82a 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1,4 +1,7 @@
+#include "ceph_debug.h"
+
+#include <linux/slab.h>
#include <asm/div64.h>
#include "super.h"
@@ -6,7 +9,6 @@
#include "crush/hash.h"
#include "crush/mapper.h"
#include "decode.h"
-#include "ceph_debug.h"
char *ceph_osdmap_state_str(char *str, int len, int state)
{
@@ -312,71 +314,6 @@ bad:
return ERR_PTR(err);
}
-
-/*
- * osd map
- */
-void ceph_osdmap_destroy(struct ceph_osdmap *map)
-{
- dout("osdmap_destroy %p\n", map);
- if (map->crush)
- crush_destroy(map->crush);
- while (!RB_EMPTY_ROOT(&map->pg_temp)) {
- struct ceph_pg_mapping *pg =
- rb_entry(rb_first(&map->pg_temp),
- struct ceph_pg_mapping, node);
- rb_erase(&pg->node, &map->pg_temp);
- kfree(pg);
- }
- while (!RB_EMPTY_ROOT(&map->pg_pools)) {
- struct ceph_pg_pool_info *pi =
- rb_entry(rb_first(&map->pg_pools),
- struct ceph_pg_pool_info, node);
- rb_erase(&pi->node, &map->pg_pools);
- kfree(pi);
- }
- kfree(map->osd_state);
- kfree(map->osd_weight);
- kfree(map->osd_addr);
- kfree(map);
-}
-
-/*
- * adjust max osd value. reallocate arrays.
- */
-static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
-{
- u8 *state;
- struct ceph_entity_addr *addr;
- u32 *weight;
-
- state = kcalloc(max, sizeof(*state), GFP_NOFS);
- addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
- weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
- if (state == NULL || addr == NULL || weight == NULL) {
- kfree(state);
- kfree(addr);
- kfree(weight);
- return -ENOMEM;
- }
-
- /* copy old? */
- if (map->osd_state) {
- memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
- memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
- memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
- kfree(map->osd_state);
- kfree(map->osd_addr);
- kfree(map->osd_weight);
- }
-
- map->osd_state = state;
- map->osd_weight = weight;
- map->osd_addr = addr;
- map->max_osd = max;
- return 0;
-}
-
/*
* rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
* to a set of osds)
@@ -480,6 +417,13 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
return NULL;
}
+static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
+{
+ rb_erase(&pi->node, root);
+ kfree(pi->name);
+ kfree(pi);
+}
+
void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
{
ceph_decode_copy(p, &pi->v, sizeof(pi->v));
@@ -488,6 +432,98 @@ void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
}
+static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
+{
+ struct ceph_pg_pool_info *pi;
+ u32 num, len, pool;
+
+ ceph_decode_32_safe(p, end, num, bad);
+ dout(" %d pool names\n", num);
+ while (num--) {
+ ceph_decode_32_safe(p, end, pool, bad);
+ ceph_decode_32_safe(p, end, len, bad);
+ dout(" pool %d len %d\n", pool, len);
+ pi = __lookup_pg_pool(&map->pg_pools, pool);
+ if (pi) {
+ kfree(pi->name);
+ pi->name = kmalloc(len + 1, GFP_NOFS);
+ if (pi->name) {
+ memcpy(pi->name, *p, len);
+ pi->name[len] = '\0';
+ dout(" name is %s\n", pi->name);
+ }
+ }
+ *p += len;
+ }
+ return 0;
+
+bad:
+ return -EINVAL;
+}
+
+/*
+ * osd map
+ */
+void ceph_osdmap_destroy(struct ceph_osdmap *map)
+{
+ dout("osdmap_destroy %p\n", map);
+ if (map->crush)
+ crush_destroy(map->crush);
+ while (!RB_EMPTY_ROOT(&map->pg_temp)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(rb_first(&map->pg_temp),
+ struct ceph_pg_mapping, node);
+ rb_erase(&pg->node, &map->pg_temp);
+ kfree(pg);
+ }
+ while (!RB_EMPTY_ROOT(&map->pg_pools)) {
+ struct ceph_pg_pool_info *pi =
+ rb_entry(rb_first(&map->pg_pools),
+ struct ceph_pg_pool_info, node);
+ __remove_pg_pool(&map->pg_pools, pi);
+ }
+ kfree(map->osd_state);
+ kfree(map->osd_weight);
+ kfree(map->osd_addr);
+ kfree(map);
+}
+
+/*
+ * adjust max osd value. reallocate arrays.
+ */
+static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+{
+ u8 *state;
+ struct ceph_entity_addr *addr;
+ u32 *weight;
+
+ state = kcalloc(max, sizeof(*state), GFP_NOFS);
+ addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
+ weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
+ if (state == NULL || addr == NULL || weight == NULL) {
+ kfree(state);
+ kfree(addr);
+ kfree(weight);
+ return -ENOMEM;
+ }
+
+ /* copy old? */
+ if (map->osd_state) {
+ memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
+ memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
+ memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
+ kfree(map->osd_state);
+ kfree(map->osd_addr);
+ kfree(map->osd_weight);
+ }
+
+ map->osd_state = state;
+ map->osd_weight = weight;
+ map->osd_addr = addr;
+ map->max_osd = max;
+ return 0;
+}
+
/*
* decode a full map.
*/
@@ -524,7 +560,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_32_safe(p, end, max, bad);
while (max--) {
ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
- pi = kmalloc(sizeof(*pi), GFP_NOFS);
+ pi = kzalloc(sizeof(*pi), GFP_NOFS);
if (!pi)
goto bad;
pi->id = ceph_decode_32(p);
@@ -537,6 +573,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
__decode_pool(p, pi);
__insert_pg_pool(&map->pg_pools, pi);
}
+
+ if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+ goto bad;
+
ceph_decode_32_safe(p, end, map->pool_max, bad);
ceph_decode_32_safe(p, end, map->flags, bad);
@@ -710,7 +750,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (!pi) {
- pi = kmalloc(sizeof(*pi), GFP_NOFS);
+ pi = kzalloc(sizeof(*pi), GFP_NOFS);
if (!pi) {
err = -ENOMEM;
goto bad;
@@ -720,6 +760,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
__decode_pool(p, pi);
}
+ if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+ goto bad;
/* old_pool */
ceph_decode_32_safe(p, end, len, bad);
@@ -728,10 +770,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
ceph_decode_32_safe(p, end, pool, bad);
pi = __lookup_pg_pool(&map->pg_pools, pool);
- if (pi) {
- rb_erase(&pi->node, &map->pg_pools);
- kfree(pi);
- }
+ if (pi)
+ __remove_pg_pool(&map->pg_pools, pi);
}
/* new_up */
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 1fb55afb2642..8bc9f1e4f562 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -23,6 +23,7 @@ struct ceph_pg_pool_info {
int id;
struct ceph_pg_pool v;
int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
+ char *name;
};
struct ceph_pg_mapping {
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index 370e93695474..5f8dbf7c745a 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -1,4 +1,5 @@
+#include <linux/gfp.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 26ac8b89a676..a1fc1d017b58 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -11,8 +11,10 @@
/*
* osdmap encoding versions
*/
-#define CEPH_OSDMAP_INC_VERSION 4
-#define CEPH_OSDMAP_VERSION 4
+#define CEPH_OSDMAP_INC_VERSION 5
+#define CEPH_OSDMAP_INC_VERSION_EXT 5
+#define CEPH_OSDMAP_VERSION 5
+#define CEPH_OSDMAP_VERSION_EXT 5
/*
* fs id
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index df04e210a055..2b881262ef67 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,6 +1,7 @@
#include "ceph_debug.h"
#include <linux/sort.h>
+#include <linux/slab.h>
#include "super.h"
#include "decode.h"
@@ -430,8 +431,7 @@ static int dup_array(u64 **dst, __le64 *src, int num)
* Caller must hold snap_rwsem for read (i.e., the realm topology won't
* change).
*/
-void ceph_queue_cap_snap(struct ceph_inode_info *ci,
- struct ceph_snap_context *snapc)
+void ceph_queue_cap_snap(struct ceph_inode_info *ci)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_cap_snap *capsnap;
@@ -450,10 +450,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
as no new writes are allowed to start when pending, so any
writes in progress now were started before the previous
cap_snap. lucky us. */
- dout("queue_cap_snap %p snapc %p seq %llu used %d"
- " already pending\n", inode, snapc, snapc->seq, used);
+ dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
} else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) {
+ struct ceph_snap_context *snapc = ci->i_head_snapc;
+
igrab(inode);
atomic_set(&capsnap->nref, 1);
@@ -462,7 +463,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
INIT_LIST_HEAD(&capsnap->flushing_item);
capsnap->follows = snapc->seq - 1;
- capsnap->context = ceph_get_snap_context(snapc);
capsnap->issued = __ceph_caps_issued(ci, NULL);
capsnap->dirty = __ceph_caps_dirty(ci);
@@ -479,7 +479,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
snapshot. */
capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
ci->i_wrbuffer_ref_head = 0;
- ceph_put_snap_context(ci->i_head_snapc);
+ capsnap->context = snapc;
ci->i_head_snapc = NULL;
list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
@@ -521,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->ctime = inode->i_ctime;
capsnap->time_warp_seq = ci->i_time_warp_seq;
if (capsnap->dirty_pages) {
- dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu "
+ dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
"still has %d dirty pages\n", inode, capsnap,
capsnap->context, capsnap->context->seq,
- capsnap->size, capsnap->dirty_pages);
+ ceph_cap_string(capsnap->dirty), capsnap->size,
+ capsnap->dirty_pages);
return 0;
}
- dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n",
+ dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
inode, capsnap, capsnap->context,
- capsnap->context->seq, capsnap->size);
+ capsnap->context->seq, ceph_cap_string(capsnap->dirty),
+ capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
@@ -601,7 +603,7 @@ more:
if (lastinode)
iput(lastinode);
lastinode = inode;
- ceph_queue_cap_snap(ci, realm->cached_context);
+ ceph_queue_cap_snap(ci);
spin_lock(&realm->inodes_with_caps_lock);
}
spin_unlock(&realm->inodes_with_caps_lock);
@@ -823,8 +825,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
spin_unlock(&realm->inodes_with_caps_lock);
spin_unlock(&inode->i_lock);
- ceph_queue_cap_snap(ci,
- ci->i_snap_realm->cached_context);
+ ceph_queue_cap_snap(ci);
iput(inode);
continue;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4290a6e860b0..75d02eaa1279 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -11,6 +11,7 @@
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/slab.h>
#include <linux/statfs.h>
#include <linux/string.h>
#include <linux/version.h>
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 65d12036b670..e30dfbb056c3 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -12,6 +12,7 @@
#include <linux/pagemap.h>
#include <linux/wait.h>
#include <linux/writeback.h>
+#include <linux/slab.h>
#include "types.h"
#include "messenger.h"
@@ -714,8 +715,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m,
extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
-extern void ceph_queue_cap_snap(struct ceph_inode_info *ci,
- struct ceph_snap_context *snapc);
+extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap);
extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 37d6ce645691..2845422907fc 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -3,6 +3,7 @@
#include "decode.h"
#include <linux/xattr.h>
+#include <linux/slab.h>
static bool ceph_is_valid_xattr(const char *name)
{