diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 21 | ||||
-rw-r--r-- | fs/ceph/cache.c | 92 | ||||
-rw-r--r-- | fs/ceph/caps.c | 40 | ||||
-rw-r--r-- | fs/ceph/file.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 18 | ||||
-rw-r--r-- | fs/ceph/locks.c | 25 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 4 | ||||
-rw-r--r-- | fs/ceph/super.c | 47 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 3 |
10 files changed, 201 insertions, 55 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1e71e6ca5ddf..50836280a6f8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -530,14 +530,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) long writeback_stat; u64 truncate_size; u32 truncate_seq; - int err = 0, len = PAGE_SIZE; + int err, len = PAGE_SIZE; dout("writepage %p idx %lu\n", page, page->index); - if (!page->mapping || !page->mapping->host) { - dout("writepage %p - no mapping\n", page); - return -EFAULT; - } inode = page->mapping->host; ci = ceph_inode(inode); fsc = ceph_inode_to_client(inode); @@ -547,7 +543,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) snapc = page_snap_context(page); if (snapc == NULL) { dout("writepage %p page %p not dirty?\n", inode, page); - goto out; + return 0; } oldest = get_oldest_context(inode, &snap_size, &truncate_size, &truncate_seq); @@ -555,9 +551,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p page %p snapc %p not writeable - noop\n", inode, page, snapc); /* we should only noop if called by kswapd */ - WARN_ON((current->flags & PF_MEMALLOC) == 0); + WARN_ON(!(current->flags & PF_MEMALLOC)); ceph_put_snap_context(oldest); - goto out; + redirty_page_for_writepage(wbc, page); + return 0; } ceph_put_snap_context(oldest); @@ -567,8 +564,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) /* is this a partial page at end of file? */ if (page_off >= snap_size) { dout("%p page eof %llu\n", page, snap_size); - goto out; + return 0; } + if (snap_size < page_off + len) len = snap_size - page_off; @@ -595,7 +593,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage interrupted page %p\n", page); redirty_page_for_writepage(wbc, page); end_page_writeback(page); - goto out; + return err; } dout("writepage setting page/mapping error %d %p\n", err, page); @@ -611,7 +609,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) end_page_writeback(page); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); ceph_put_snap_context(snapc); /* page's reference */ -out: return err; } @@ -1318,7 +1315,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = file_inode(file); - int check_cap = 0; + bool check_cap = false; dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, inode, page, (int)pos, (int)copied, (int)len); diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 4e7421caf380..fd1172823f86 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -35,18 +35,34 @@ struct fscache_netfs ceph_cache_netfs = { .version = 0, }; +static DEFINE_MUTEX(ceph_fscache_lock); +static LIST_HEAD(ceph_fscache_list); + +struct ceph_fscache_entry { + struct list_head list; + struct fscache_cookie *fscache; + struct ceph_fsid fsid; + size_t uniq_len; + char uniquifier[0]; +}; + static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data, void *buffer, uint16_t maxbuf) { const struct ceph_fs_client* fsc = cookie_netfs_data; - uint16_t klen; + const char *fscache_uniq = fsc->mount_options->fscache_uniq; + uint16_t fsid_len, uniq_len; - klen = sizeof(fsc->client->fsid); - if (klen > maxbuf) + fsid_len = sizeof(fsc->client->fsid); + uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0; + if (fsid_len + uniq_len > maxbuf) return 0; - memcpy(buffer, &fsc->client->fsid, klen); - return klen; + memcpy(buffer, &fsc->client->fsid, fsid_len); + if (uniq_len) + memcpy(buffer + fsid_len, fscache_uniq, uniq_len); + + return fsid_len + uniq_len; } static const struct fscache_cookie_def ceph_fscache_fsid_object_def = { @@ -67,13 +83,54 @@ void ceph_fscache_unregister(void) int ceph_fscache_register_fs(struct ceph_fs_client* fsc) { + const struct ceph_fsid *fsid = &fsc->client->fsid; + const char *fscache_uniq = fsc->mount_options->fscache_uniq; + size_t uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0; + struct ceph_fscache_entry *ent; + int err = 0; + + mutex_lock(&ceph_fscache_lock); + list_for_each_entry(ent, &ceph_fscache_list, list) { + if (memcmp(&ent->fsid, fsid, sizeof(*fsid))) + continue; + if (ent->uniq_len != uniq_len) + continue; + if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len)) + continue; + + pr_err("fscache cookie already registered for fsid %pU\n", fsid); + pr_err(" use fsc=%%s mount option to specify a uniquifier\n"); + err = -EBUSY; + goto out_unlock; + } + + ent = kzalloc(sizeof(*ent) + uniq_len, GFP_KERNEL); + if (!ent) { + err = -ENOMEM; + goto out_unlock; + } + fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index, &ceph_fscache_fsid_object_def, fsc, true); - if (!fsc->fscache) - pr_err("Unable to register fsid: %p fscache cookie\n", fsc); - return 0; + if (fsc->fscache) { + memcpy(&ent->fsid, fsid, sizeof(*fsid)); + if (uniq_len > 0) { + memcpy(&ent->uniquifier, fscache_uniq, uniq_len); + ent->uniq_len = uniq_len; + } + ent->fscache = fsc->fscache; + list_add_tail(&ent->list, &ceph_fscache_list); + } else { + kfree(ent); + pr_err("unable to register fscache cookie for fsid %pU\n", + fsid); + /* all other fs ignore this error */ + } +out_unlock: + mutex_unlock(&ceph_fscache_lock); + return err; } static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data, @@ -349,7 +406,24 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc) { - fscache_relinquish_cookie(fsc->fscache, 0); + if (fscache_cookie_valid(fsc->fscache)) { + struct ceph_fscache_entry *ent; + bool found = false; + + mutex_lock(&ceph_fscache_lock); + list_for_each_entry(ent, &ceph_fscache_list, list) { + if (ent->fscache == fsc->fscache) { + list_del(&ent->list); + kfree(ent); + found = true; + break; + } + } + WARN_ON_ONCE(!found); + mutex_unlock(&ceph_fscache_lock); + + __fscache_relinquish_cookie(fsc->fscache, 0); + } fsc->fscache = NULL; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a3ebb632294e..7007ae2a5ad2 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1653,6 +1653,21 @@ static int try_nonblocking_invalidate(struct inode *inode) return -1; } +bool __ceph_should_report_size(struct ceph_inode_info *ci) +{ + loff_t size = ci->vfs_inode.i_size; + /* mds will adjust max size according to the reported size */ + if (ci->i_flushing_caps & CEPH_CAP_FILE_WR) + return false; + if (size >= ci->i_max_size) + return true; + /* half of previous max_size increment has been used */ + if (ci->i_max_size > ci->i_reported_size && + (size << 1) >= ci->i_max_size + ci->i_reported_size) + return true; + return false; +} + /* * Swiss army knife function to examine currently used and wanted * versus held caps. Release, flush, ack revoked caps to mds as @@ -1806,8 +1821,7 @@ retry_locked: } /* approaching file_max? */ - if ((inode->i_size << 1) >= ci->i_max_size && - (ci->i_reported_size << 1) < ci->i_max_size) { + if (__ceph_should_report_size(ci)) { dout("i_size approaching max_size\n"); goto ack; } @@ -3027,8 +3041,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, le32_to_cpu(grant->truncate_seq), le64_to_cpu(grant->truncate_size), size); - /* max size increase? */ - if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { + } + + if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) { + if (max_size != ci->i_max_size) { dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); ci->i_max_size = max_size; @@ -3037,6 +3053,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, ci->i_requested_max_size = 0; } wake = true; + } else if (ci->i_wanted_max_size > ci->i_max_size && + ci->i_wanted_max_size > ci->i_requested_max_size) { + /* CEPH_CAP_OP_IMPORT */ + wake = true; } } @@ -3554,7 +3574,6 @@ retry: } /* make sure we re-request max_size, if necessary */ - ci->i_wanted_max_size = 0; ci->i_requested_max_size = 0; *old_issued = issued; @@ -3790,6 +3809,7 @@ bad: */ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) { + struct inode *inode; struct ceph_inode_info *ci; int flags = CHECK_CAPS_NODELAY; @@ -3805,9 +3825,15 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) time_before(jiffies, ci->i_hold_caps_max)) break; list_del_init(&ci->i_cap_delay_list); + + inode = igrab(&ci->vfs_inode); spin_unlock(&mdsc->cap_delay_lock); - dout("check_delayed_caps on %p\n", &ci->vfs_inode); - ceph_check_caps(ci, flags, NULL); + + if (inode) { + dout("check_delayed_caps on %p\n", inode); + ceph_check_caps(ci, flags, NULL); + iput(inode); + } } spin_unlock(&mdsc->cap_delay_lock); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 29308a80d66f..3d48c415f3cb 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1040,8 +1040,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, int num_pages; int written = 0; int flags; - int check_caps = 0; int ret; + bool check_caps = false; struct timespec mtime = current_time(inode); size_t count = iov_iter_count(from); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4de6cdddf059..220dfd87cbfa 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1016,6 +1016,7 @@ static void update_dentry_lease(struct dentry *dentry, long unsigned ttl = from_time + (duration * HZ) / 1000; long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; struct inode *dir; + struct ceph_mds_session *old_lease_session = NULL; /* * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that @@ -1051,8 +1052,10 @@ static void update_dentry_lease(struct dentry *dentry, time_before(ttl, di->time)) goto out_unlock; /* we already have a newer lease. */ - if (di->lease_session && di->lease_session != session) - goto out_unlock; + if (di->lease_session && di->lease_session != session) { + old_lease_session = di->lease_session; + di->lease_session = NULL; + } ceph_dentry_lru_touch(dentry); @@ -1065,6 +1068,8 @@ static void update_dentry_lease(struct dentry *dentry, di->time = ttl; out_unlock: spin_unlock(&dentry->d_lock); + if (old_lease_session) + ceph_put_mds_session(old_lease_session); return; } @@ -1653,20 +1658,17 @@ out: return err; } -int ceph_inode_set_size(struct inode *inode, loff_t size) +bool ceph_inode_set_size(struct inode *inode, loff_t size) { struct ceph_inode_info *ci = ceph_inode(inode); - int ret = 0; + bool ret; spin_lock(&ci->i_ceph_lock); dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size); i_size_write(inode, size); inode->i_blocks = calc_inode_blocks(size); - /* tell the MDS if we are approaching max_size */ - if ((size << 1) >= ci->i_max_size && - (ci->i_reported_size << 1) < ci->i_max_size) - ret = 1; + ret = __ceph_should_report_size(ci); spin_unlock(&ci->i_ceph_lock); return ret; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 6806dbeaee19..64ae74472046 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -127,6 +127,29 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, dout("ceph_lock_wait_for_completion: request %llu was interrupted\n", req->r_tid); + mutex_lock(&mdsc->mutex); + if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { + err = 0; + } else { + /* + * ensure we aren't running concurrently with + * ceph_fill_trace or ceph_readdir_prepopulate, which + * rely on locks (dir mutex) held by our caller. + */ + mutex_lock(&req->r_fill_mutex); + req->r_err = err; + set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); + mutex_unlock(&req->r_fill_mutex); + + if (!req->r_session) { + // haven't sent the request + err = 0; + } + } + mutex_unlock(&mdsc->mutex); + if (!err) + return 0; + intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK, USE_AUTH_MDS); if (IS_ERR(intr_req)) @@ -146,7 +169,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, if (err && err != -ERESTARTSYS) return err; - wait_for_completion(&req->r_completion); + wait_for_completion_killable(&req->r_safe_completion); return 0; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0c05df44cc6c..666a9f274832 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3769,13 +3769,13 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) void ceph_mdsc_destroy(struct ceph_fs_client *fsc) { struct ceph_mds_client *mdsc = fsc->mdsc; - dout("mdsc_destroy %p\n", mdsc); - ceph_mdsc_stop(mdsc); /* flush out any connection work with references to us */ ceph_msgr_flush(); + ceph_mdsc_stop(mdsc); + fsc->mdsc = NULL; kfree(mdsc); dout("mdsc_destroy %p done\n", mdsc); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 8d7918ce694a..aa06a8c24792 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -121,6 +121,7 @@ enum { /* int args above */ Opt_snapdirname, Opt_mds_namespace, + Opt_fscache_uniq, Opt_last_string, /* string args above */ Opt_dirstat, @@ -158,6 +159,7 @@ static match_table_t fsopt_tokens = { /* int args above */ {Opt_snapdirname, "snapdirname=%s"}, {Opt_mds_namespace, "mds_namespace=%s"}, + {Opt_fscache_uniq, "fsc=%s"}, /* string args above */ {Opt_dirstat, "dirstat"}, {Opt_nodirstat, "nodirstat"}, @@ -223,6 +225,14 @@ static int parse_fsopt_token(char *c, void *private) if (!fsopt->mds_namespace) return -ENOMEM; break; + case Opt_fscache_uniq: + fsopt->fscache_uniq = kstrndup(argstr[0].from, + argstr[0].to-argstr[0].from, + GFP_KERNEL); + if (!fsopt->fscache_uniq) + return -ENOMEM; + fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; + break; /* misc */ case Opt_wsize: fsopt->wsize = intval; @@ -317,6 +327,7 @@ static void destroy_mount_options(struct ceph_mount_options *args) kfree(args->snapdir_name); kfree(args->mds_namespace); kfree(args->server_path); + kfree(args->fscache_uniq); kfree(args); } @@ -350,10 +361,12 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; - ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); if (ret) return ret; + ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); + if (ret) + return ret; return ceph_compare_options(new_opt, fsc->client); } @@ -475,8 +488,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",noasyncreaddir"); if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) seq_puts(m, ",nodcache"); - if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) - seq_puts(m, ",fsc"); + if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { + if (fsopt->fscache_uniq) + seq_printf(m, ",fsc=%s", fsopt->fscache_uniq); + else + seq_puts(m, ",fsc"); + } if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) seq_puts(m, ",nopoolperm"); @@ -597,18 +614,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, if (!fsc->wb_pagevec_pool) goto fail_trunc_wq; - /* setup fscache */ - if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) && - (ceph_fscache_register_fs(fsc) != 0)) - goto fail_fscache; - /* caps */ fsc->min_caps = fsopt->max_readdir; return fsc; -fail_fscache: - ceph_fscache_unregister_fs(fsc); fail_trunc_wq: destroy_workqueue(fsc->trunc_wq); fail_pg_inv_wq: @@ -626,8 +636,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) { dout("destroy_fs_client %p\n", fsc); - ceph_fscache_unregister_fs(fsc); - destroy_workqueue(fsc->wb_wq); destroy_workqueue(fsc->pg_inv_wq); destroy_workqueue(fsc->trunc_wq); @@ -636,8 +644,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) destroy_mount_options(fsc->mount_options); - ceph_fs_debugfs_cleanup(fsc); - ceph_destroy_client(fsc->client); kfree(fsc); @@ -822,6 +828,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) if (err < 0) goto out; + /* setup fscache */ + if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { + err = ceph_fscache_register_fs(fsc); + if (err < 0) + goto out; + } + if (!fsc->mount_options->server_path) { path = ""; dout("mount opening path \\t\n"); @@ -1040,6 +1053,12 @@ static void ceph_kill_sb(struct super_block *s) ceph_mdsc_pre_umount(fsc->mdsc); generic_shutdown_super(s); + + fsc->client->extra_mon_dispatch = NULL; + ceph_fs_debugfs_cleanup(fsc); + + ceph_fscache_unregister_fs(fsc); + ceph_mdsc_destroy(fsc); destroy_fs_client(fsc); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a973acd8beaf..f02a2225fe42 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -73,6 +73,7 @@ struct ceph_mount_options { char *snapdir_name; /* default ".snap" */ char *mds_namespace; /* default NULL */ char *server_path; /* default "/" */ + char *fscache_uniq; /* default NULL */ }; struct ceph_fs_client { @@ -793,7 +794,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, extern int ceph_inode_holds_cap(struct inode *inode, int mask); -extern int ceph_inode_set_size(struct inode *inode, loff_t size); +extern bool ceph_inode_set_size(struct inode *inode, loff_t size); extern void __ceph_do_pending_vmtruncate(struct inode *inode); extern void ceph_queue_vmtruncate(struct inode *inode); @@ -918,6 +919,7 @@ extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); extern void ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession); +extern bool __ceph_should_report_size(struct ceph_inode_info *ci); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 75267cdd5dfd..11263f102e4c 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -756,6 +756,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); if (vxattr) { + err = ceph_do_getattr(inode, 0, true); + if (err) + return err; err = -ENODATA; if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) err = vxattr->getxattr_cb(ci, value, size); |