summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c21
-rw-r--r--fs/ceph/cache.c92
-rw-r--r--fs/ceph/caps.c40
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/locks.c25
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/super.c47
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c3
10 files changed, 201 insertions, 55 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e71e6ca5ddf..50836280a6f8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -530,14 +530,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
long writeback_stat;
u64 truncate_size;
u32 truncate_seq;
- int err = 0, len = PAGE_SIZE;
+ int err, len = PAGE_SIZE;
dout("writepage %p idx %lu\n", page, page->index);
- if (!page->mapping || !page->mapping->host) {
- dout("writepage %p - no mapping\n", page);
- return -EFAULT;
- }
inode = page->mapping->host;
ci = ceph_inode(inode);
fsc = ceph_inode_to_client(inode);
@@ -547,7 +543,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
snapc = page_snap_context(page);
if (snapc == NULL) {
dout("writepage %p page %p not dirty?\n", inode, page);
- goto out;
+ return 0;
}
oldest = get_oldest_context(inode, &snap_size,
&truncate_size, &truncate_seq);
@@ -555,9 +551,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage %p page %p snapc %p not writeable - noop\n",
inode, page, snapc);
/* we should only noop if called by kswapd */
- WARN_ON((current->flags & PF_MEMALLOC) == 0);
+ WARN_ON(!(current->flags & PF_MEMALLOC));
ceph_put_snap_context(oldest);
- goto out;
+ redirty_page_for_writepage(wbc, page);
+ return 0;
}
ceph_put_snap_context(oldest);
@@ -567,8 +564,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
/* is this a partial page at end of file? */
if (page_off >= snap_size) {
dout("%p page eof %llu\n", page, snap_size);
- goto out;
+ return 0;
}
+
if (snap_size < page_off + len)
len = snap_size - page_off;
@@ -595,7 +593,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage interrupted page %p\n", page);
redirty_page_for_writepage(wbc, page);
end_page_writeback(page);
- goto out;
+ return err;
}
dout("writepage setting page/mapping error %d %p\n",
err, page);
@@ -611,7 +609,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc); /* page's reference */
-out:
return err;
}
@@ -1318,7 +1315,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = file_inode(file);
- int check_cap = 0;
+ bool check_cap = false;
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len);
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 4e7421caf380..fd1172823f86 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -35,18 +35,34 @@ struct fscache_netfs ceph_cache_netfs = {
.version = 0,
};
+static DEFINE_MUTEX(ceph_fscache_lock);
+static LIST_HEAD(ceph_fscache_list);
+
+struct ceph_fscache_entry {
+ struct list_head list;
+ struct fscache_cookie *fscache;
+ struct ceph_fsid fsid;
+ size_t uniq_len;
+ char uniquifier[0];
+};
+
static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t maxbuf)
{
const struct ceph_fs_client* fsc = cookie_netfs_data;
- uint16_t klen;
+ const char *fscache_uniq = fsc->mount_options->fscache_uniq;
+ uint16_t fsid_len, uniq_len;
- klen = sizeof(fsc->client->fsid);
- if (klen > maxbuf)
+ fsid_len = sizeof(fsc->client->fsid);
+ uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
+ if (fsid_len + uniq_len > maxbuf)
return 0;
- memcpy(buffer, &fsc->client->fsid, klen);
- return klen;
+ memcpy(buffer, &fsc->client->fsid, fsid_len);
+ if (uniq_len)
+ memcpy(buffer + fsid_len, fscache_uniq, uniq_len);
+
+ return fsid_len + uniq_len;
}
static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
@@ -67,13 +83,54 @@ void ceph_fscache_unregister(void)
int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
{
+ const struct ceph_fsid *fsid = &fsc->client->fsid;
+ const char *fscache_uniq = fsc->mount_options->fscache_uniq;
+ size_t uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
+ struct ceph_fscache_entry *ent;
+ int err = 0;
+
+ mutex_lock(&ceph_fscache_lock);
+ list_for_each_entry(ent, &ceph_fscache_list, list) {
+ if (memcmp(&ent->fsid, fsid, sizeof(*fsid)))
+ continue;
+ if (ent->uniq_len != uniq_len)
+ continue;
+ if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
+ continue;
+
+ pr_err("fscache cookie already registered for fsid %pU\n", fsid);
+ pr_err(" use fsc=%%s mount option to specify a uniquifier\n");
+ err = -EBUSY;
+ goto out_unlock;
+ }
+
+ ent = kzalloc(sizeof(*ent) + uniq_len, GFP_KERNEL);
+ if (!ent) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
&ceph_fscache_fsid_object_def,
fsc, true);
- if (!fsc->fscache)
- pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
- return 0;
+ if (fsc->fscache) {
+ memcpy(&ent->fsid, fsid, sizeof(*fsid));
+ if (uniq_len > 0) {
+ memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
+ ent->uniq_len = uniq_len;
+ }
+ ent->fscache = fsc->fscache;
+ list_add_tail(&ent->list, &ceph_fscache_list);
+ } else {
+ kfree(ent);
+ pr_err("unable to register fscache cookie for fsid %pU\n",
+ fsid);
+ /* all other fs ignore this error */
+ }
+out_unlock:
+ mutex_unlock(&ceph_fscache_lock);
+ return err;
}
static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
@@ -349,7 +406,24 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
- fscache_relinquish_cookie(fsc->fscache, 0);
+ if (fscache_cookie_valid(fsc->fscache)) {
+ struct ceph_fscache_entry *ent;
+ bool found = false;
+
+ mutex_lock(&ceph_fscache_lock);
+ list_for_each_entry(ent, &ceph_fscache_list, list) {
+ if (ent->fscache == fsc->fscache) {
+ list_del(&ent->list);
+ kfree(ent);
+ found = true;
+ break;
+ }
+ }
+ WARN_ON_ONCE(!found);
+ mutex_unlock(&ceph_fscache_lock);
+
+ __fscache_relinquish_cookie(fsc->fscache, 0);
+ }
fsc->fscache = NULL;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a3ebb632294e..7007ae2a5ad2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1653,6 +1653,21 @@ static int try_nonblocking_invalidate(struct inode *inode)
return -1;
}
+bool __ceph_should_report_size(struct ceph_inode_info *ci)
+{
+ loff_t size = ci->vfs_inode.i_size;
+ /* mds will adjust max size according to the reported size */
+ if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
+ return false;
+ if (size >= ci->i_max_size)
+ return true;
+ /* half of previous max_size increment has been used */
+ if (ci->i_max_size > ci->i_reported_size &&
+ (size << 1) >= ci->i_max_size + ci->i_reported_size)
+ return true;
+ return false;
+}
+
/*
* Swiss army knife function to examine currently used and wanted
* versus held caps. Release, flush, ack revoked caps to mds as
@@ -1806,8 +1821,7 @@ retry_locked:
}
/* approaching file_max? */
- if ((inode->i_size << 1) >= ci->i_max_size &&
- (ci->i_reported_size << 1) < ci->i_max_size) {
+ if (__ceph_should_report_size(ci)) {
dout("i_size approaching max_size\n");
goto ack;
}
@@ -3027,8 +3041,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
le32_to_cpu(grant->truncate_seq),
le64_to_cpu(grant->truncate_size),
size);
- /* max size increase? */
- if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+ }
+
+ if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
+ if (max_size != ci->i_max_size) {
dout("max_size %lld -> %llu\n",
ci->i_max_size, max_size);
ci->i_max_size = max_size;
@@ -3037,6 +3053,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
ci->i_requested_max_size = 0;
}
wake = true;
+ } else if (ci->i_wanted_max_size > ci->i_max_size &&
+ ci->i_wanted_max_size > ci->i_requested_max_size) {
+ /* CEPH_CAP_OP_IMPORT */
+ wake = true;
}
}
@@ -3554,7 +3574,6 @@ retry:
}
/* make sure we re-request max_size, if necessary */
- ci->i_wanted_max_size = 0;
ci->i_requested_max_size = 0;
*old_issued = issued;
@@ -3790,6 +3809,7 @@ bad:
*/
void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{
+ struct inode *inode;
struct ceph_inode_info *ci;
int flags = CHECK_CAPS_NODELAY;
@@ -3805,9 +3825,15 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
time_before(jiffies, ci->i_hold_caps_max))
break;
list_del_init(&ci->i_cap_delay_list);
+
+ inode = igrab(&ci->vfs_inode);
spin_unlock(&mdsc->cap_delay_lock);
- dout("check_delayed_caps on %p\n", &ci->vfs_inode);
- ceph_check_caps(ci, flags, NULL);
+
+ if (inode) {
+ dout("check_delayed_caps on %p\n", inode);
+ ceph_check_caps(ci, flags, NULL);
+ iput(inode);
+ }
}
spin_unlock(&mdsc->cap_delay_lock);
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 29308a80d66f..3d48c415f3cb 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1040,8 +1040,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
int num_pages;
int written = 0;
int flags;
- int check_caps = 0;
int ret;
+ bool check_caps = false;
struct timespec mtime = current_time(inode);
size_t count = iov_iter_count(from);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4de6cdddf059..220dfd87cbfa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1016,6 +1016,7 @@ static void update_dentry_lease(struct dentry *dentry,
long unsigned ttl = from_time + (duration * HZ) / 1000;
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
struct inode *dir;
+ struct ceph_mds_session *old_lease_session = NULL;
/*
* Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
@@ -1051,8 +1052,10 @@ static void update_dentry_lease(struct dentry *dentry,
time_before(ttl, di->time))
goto out_unlock; /* we already have a newer lease. */
- if (di->lease_session && di->lease_session != session)
- goto out_unlock;
+ if (di->lease_session && di->lease_session != session) {
+ old_lease_session = di->lease_session;
+ di->lease_session = NULL;
+ }
ceph_dentry_lru_touch(dentry);
@@ -1065,6 +1068,8 @@ static void update_dentry_lease(struct dentry *dentry,
di->time = ttl;
out_unlock:
spin_unlock(&dentry->d_lock);
+ if (old_lease_session)
+ ceph_put_mds_session(old_lease_session);
return;
}
@@ -1653,20 +1658,17 @@ out:
return err;
}
-int ceph_inode_set_size(struct inode *inode, loff_t size)
+bool ceph_inode_set_size(struct inode *inode, loff_t size)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- int ret = 0;
+ bool ret;
spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
i_size_write(inode, size);
inode->i_blocks = calc_inode_blocks(size);
- /* tell the MDS if we are approaching max_size */
- if ((size << 1) >= ci->i_max_size &&
- (ci->i_reported_size << 1) < ci->i_max_size)
- ret = 1;
+ ret = __ceph_should_report_size(ci);
spin_unlock(&ci->i_ceph_lock);
return ret;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 6806dbeaee19..64ae74472046 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -127,6 +127,29 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
req->r_tid);
+ mutex_lock(&mdsc->mutex);
+ if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+ err = 0;
+ } else {
+ /*
+ * ensure we aren't running concurrently with
+ * ceph_fill_trace or ceph_readdir_prepopulate, which
+ * rely on locks (dir mutex) held by our caller.
+ */
+ mutex_lock(&req->r_fill_mutex);
+ req->r_err = err;
+ set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+ mutex_unlock(&req->r_fill_mutex);
+
+ if (!req->r_session) {
+ // haven't sent the request
+ err = 0;
+ }
+ }
+ mutex_unlock(&mdsc->mutex);
+ if (!err)
+ return 0;
+
intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
USE_AUTH_MDS);
if (IS_ERR(intr_req))
@@ -146,7 +169,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
if (err && err != -ERESTARTSYS)
return err;
- wait_for_completion(&req->r_completion);
+ wait_for_completion_killable(&req->r_safe_completion);
return 0;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0c05df44cc6c..666a9f274832 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3769,13 +3769,13 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
{
struct ceph_mds_client *mdsc = fsc->mdsc;
-
dout("mdsc_destroy %p\n", mdsc);
- ceph_mdsc_stop(mdsc);
/* flush out any connection work with references to us */
ceph_msgr_flush();
+ ceph_mdsc_stop(mdsc);
+
fsc->mdsc = NULL;
kfree(mdsc);
dout("mdsc_destroy %p done\n", mdsc);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8d7918ce694a..aa06a8c24792 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -121,6 +121,7 @@ enum {
/* int args above */
Opt_snapdirname,
Opt_mds_namespace,
+ Opt_fscache_uniq,
Opt_last_string,
/* string args above */
Opt_dirstat,
@@ -158,6 +159,7 @@ static match_table_t fsopt_tokens = {
/* int args above */
{Opt_snapdirname, "snapdirname=%s"},
{Opt_mds_namespace, "mds_namespace=%s"},
+ {Opt_fscache_uniq, "fsc=%s"},
/* string args above */
{Opt_dirstat, "dirstat"},
{Opt_nodirstat, "nodirstat"},
@@ -223,6 +225,14 @@ static int parse_fsopt_token(char *c, void *private)
if (!fsopt->mds_namespace)
return -ENOMEM;
break;
+ case Opt_fscache_uniq:
+ fsopt->fscache_uniq = kstrndup(argstr[0].from,
+ argstr[0].to-argstr[0].from,
+ GFP_KERNEL);
+ if (!fsopt->fscache_uniq)
+ return -ENOMEM;
+ fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+ break;
/* misc */
case Opt_wsize:
fsopt->wsize = intval;
@@ -317,6 +327,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
kfree(args->snapdir_name);
kfree(args->mds_namespace);
kfree(args->server_path);
+ kfree(args->fscache_uniq);
kfree(args);
}
@@ -350,10 +361,12 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
if (ret)
return ret;
-
ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
if (ret)
return ret;
+ ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
+ if (ret)
+ return ret;
return ceph_compare_options(new_opt, fsc->client);
}
@@ -475,8 +488,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",noasyncreaddir");
if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
seq_puts(m, ",nodcache");
- if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
- seq_puts(m, ",fsc");
+ if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
+ if (fsopt->fscache_uniq)
+ seq_printf(m, ",fsc=%s", fsopt->fscache_uniq);
+ else
+ seq_puts(m, ",fsc");
+ }
if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
seq_puts(m, ",nopoolperm");
@@ -597,18 +614,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
if (!fsc->wb_pagevec_pool)
goto fail_trunc_wq;
- /* setup fscache */
- if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
- (ceph_fscache_register_fs(fsc) != 0))
- goto fail_fscache;
-
/* caps */
fsc->min_caps = fsopt->max_readdir;
return fsc;
-fail_fscache:
- ceph_fscache_unregister_fs(fsc);
fail_trunc_wq:
destroy_workqueue(fsc->trunc_wq);
fail_pg_inv_wq:
@@ -626,8 +636,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
{
dout("destroy_fs_client %p\n", fsc);
- ceph_fscache_unregister_fs(fsc);
-
destroy_workqueue(fsc->wb_wq);
destroy_workqueue(fsc->pg_inv_wq);
destroy_workqueue(fsc->trunc_wq);
@@ -636,8 +644,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
destroy_mount_options(fsc->mount_options);
- ceph_fs_debugfs_cleanup(fsc);
-
ceph_destroy_client(fsc->client);
kfree(fsc);
@@ -822,6 +828,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
if (err < 0)
goto out;
+ /* setup fscache */
+ if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
+ err = ceph_fscache_register_fs(fsc);
+ if (err < 0)
+ goto out;
+ }
+
if (!fsc->mount_options->server_path) {
path = "";
dout("mount opening path \\t\n");
@@ -1040,6 +1053,12 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
generic_shutdown_super(s);
+
+ fsc->client->extra_mon_dispatch = NULL;
+ ceph_fs_debugfs_cleanup(fsc);
+
+ ceph_fscache_unregister_fs(fsc);
+
ceph_mdsc_destroy(fsc);
destroy_fs_client(fsc);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a973acd8beaf..f02a2225fe42 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -73,6 +73,7 @@ struct ceph_mount_options {
char *snapdir_name; /* default ".snap" */
char *mds_namespace; /* default NULL */
char *server_path; /* default "/" */
+ char *fscache_uniq; /* default NULL */
};
struct ceph_fs_client {
@@ -793,7 +794,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
extern int ceph_inode_holds_cap(struct inode *inode, int mask);
-extern int ceph_inode_set_size(struct inode *inode, loff_t size);
+extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
extern void __ceph_do_pending_vmtruncate(struct inode *inode);
extern void ceph_queue_vmtruncate(struct inode *inode);
@@ -918,6 +919,7 @@ extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession);
+extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session);
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 75267cdd5dfd..11263f102e4c 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -756,6 +756,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
/* let's see if a virtual xattr was requested */
vxattr = ceph_match_vxattr(inode, name);
if (vxattr) {
+ err = ceph_do_getattr(inode, 0, true);
+ if (err)
+ return err;
err = -ENODATA;
if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
err = vxattr->getxattr_cb(ci, value, size);