summaryrefslogtreecommitdiffstats
path: root/fs/afs/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 11:41:22 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 11:41:22 -0800
commit487e2c9f44c4b5ea23bfe87bb34679f7297a0bce (patch)
treee9dcf16175078ae2bed9a2fc120e6bd0b28f48e9 /fs/afs/inode.c
parentb630a23a731a436f9edbd9fa00739aaa3e174c15 (diff)
parent98bf40cd99fcfed0705812b6cbdbb3b441a42970 (diff)
downloadlinux-487e2c9f44c4b5ea23bfe87bb34679f7297a0bce.tar.bz2
Merge tag 'afs-next-20171113' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull AFS updates from David Howells: "kAFS filesystem driver overhaul. The major points of the overhaul are: (1) Preliminary groundwork is laid for supporting network-namespacing of kAFS. The remainder of the namespacing work requires some way to pass namespace information to submounts triggered by an automount. This requires something like the mount overhaul that's in progress. (2) sockaddr_rxrpc is used in preference to in_addr for holding addresses internally and add support for talking to the YFS VL server. With this, kAFS can do everything over IPv6 as well as IPv4 if it's talking to servers that support it. (3) Callback handling is overhauled to be generally passive rather than active. 'Callbacks' are promises by the server to tell us about data and metadata changes. Callbacks are now checked when we next touch an inode rather than actively going and looking for it where possible. (4) File access permit caching is overhauled to store the caching information per-inode rather than per-directory, shared over subordinate files. Whilst older AFS servers only allow ACLs on directories (shared to the files in that directory), newer AFS servers break that restriction. To improve memory usage and to make it easier to do mass-key removal, permit combinations are cached and shared. (5) Cell database management is overhauled to allow lighter locks to be used and to make cell records autonomous state machines that look after getting their own DNS records and cleaning themselves up, in particular preventing races in acquiring and relinquishing the fscache token for the cell. (6) Volume caching is overhauled. The afs_vlocation record is got rid of to simplify things and the superblock is now keyed on the cell and the numeric volume ID only. The volume record is tied to a superblock and normal superblock management is used to mediate the lifetime of the volume fscache token. (7) File server record caching is overhauled to make server records independent of cells and volumes. A server can be in multiple cells (in such a case, the administrator must make sure that the VL services for all cells correctly reflect the volumes shared between those cells). Server records are now indexed using the UUID of the server rather than the address since a server can have multiple addresses. (8) File server rotation is overhauled to handle VMOVED, VBUSY (and similar), VOFFLINE and VNOVOL indications and to handle rotation both of servers and addresses of those servers. The rotation will also wait and retry if the server says it is busy. (9) Data writeback is overhauled. Each inode no longer stores a list of modified sections tagged with the key that authorised it in favour of noting the modified region of a page in page->private and storing a list of keys that made modifications in the inode. This simplifies things and allows other keys to be used to actually write to the server if a key that made a modification becomes useless. (10) Writable mmap() is implemented. This allows a kernel to be build entirely on AFS. Note that Pre AFS-3.4 servers are no longer supported, though this can be added back if necessary (AFS-3.4 was released in 1998)" * tag 'afs-next-20171113' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: (35 commits) afs: Protect call->state changes against signals afs: Trace page dirty/clean afs: Implement shared-writeable mmap afs: Get rid of the afs_writeback record afs: Introduce a file-private data record afs: Use a dynamic port if 7001 is in use afs: Fix directory read/modify race afs: Trace the sending of pages afs: Trace the initiation and completion of client calls afs: Fix documentation on # vs % prefix in mount source specification afs: Fix total-length calculation for multiple-page send afs: Only progress call state at end of Tx phase from rxrpc callback afs: Make use of the YFS service upgrade to fully support IPv6 afs: Overhaul volume and server record caching and fileserver rotation afs: Move server rotation code into its own file afs: Add an address list concept afs: Overhaul cell database management afs: Overhaul permit caching afs: Overhaul the callback handling afs: Rename struct afs_call server member to cm_server ...
Diffstat (limited to 'fs/afs/inode.c')
-rw-r--r--fs/afs/inode.c177
1 files changed, 113 insertions, 64 deletions
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 342316a9e3e0..3415eb7484f6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -23,11 +23,6 @@
#include <linux/namei.h>
#include "internal.h"
-struct afs_iget_data {
- struct afs_fid fid;
- struct afs_volume *volume; /* volume on which resides */
-};
-
static const struct inode_operations afs_symlink_inode_operations = {
.get_link = page_get_link,
.listxattr = afs_listxattr,
@@ -39,6 +34,7 @@ static const struct inode_operations afs_symlink_inode_operations = {
static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
{
struct inode *inode = AFS_VNODE_TO_I(vnode);
+ bool changed;
_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
vnode->status.type,
@@ -47,6 +43,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
vnode->status.data_version,
vnode->status.mode);
+ read_seqlock_excl(&vnode->cb_lock);
+
switch (vnode->status.type) {
case AFS_FTYPE_FILE:
inode->i_mode = S_IFREG | vnode->status.mode;
@@ -63,9 +61,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
if ((vnode->status.mode & 0777) == 0644) {
inode->i_flags |= S_AUTOMOUNT;
- spin_lock(&vnode->lock);
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
- spin_unlock(&vnode->lock);
inode->i_mode = S_IFDIR | 0555;
inode->i_op = &afs_mntpt_inode_operations;
@@ -78,13 +74,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
break;
default:
printk("kAFS: AFS vnode with undefined type\n");
+ read_sequnlock_excl(&vnode->cb_lock);
return -EBADMSG;
}
-#ifdef CONFIG_AFS_FSCACHE
- if (vnode->status.size != inode->i_size)
- fscache_attr_changed(vnode->cache);
-#endif
+ changed = (vnode->status.size != inode->i_size);
set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
@@ -97,13 +91,49 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
inode->i_generation = vnode->fid.unique;
inode->i_version = vnode->status.data_version;
inode->i_mapping->a_ops = &afs_fs_aops;
+
+ read_sequnlock_excl(&vnode->cb_lock);
+
+#ifdef CONFIG_AFS_FSCACHE
+ if (changed)
+ fscache_attr_changed(vnode->cache);
+#endif
return 0;
}
/*
+ * Fetch file status from the volume.
+ */
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key)
+{
+ struct afs_fs_cursor fc;
+ int ret;
+
+ _enter("%s,{%x:%u.%u,S=%lx}",
+ vnode->volume->name,
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ vnode->flags);
+
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_fetch_file_status(&fc, NULL);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* iget5() comparator
*/
-static int afs_iget5_test(struct inode *inode, void *opaque)
+int afs_iget5_test(struct inode *inode, void *opaque)
{
struct afs_iget_data *data = opaque;
@@ -204,7 +234,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
*/
struct inode *afs_iget(struct super_block *sb, struct key *key,
struct afs_fid *fid, struct afs_file_status *status,
- struct afs_callback *cb)
+ struct afs_callback *cb, struct afs_cb_interest *cbi)
{
struct afs_iget_data data = { .fid = *fid };
struct afs_super_info *as;
@@ -237,8 +267,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
if (!status) {
/* it's a remotely extant inode */
- set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
- ret = afs_vnode_fetch_status(vnode, NULL, key);
+ ret = afs_fetch_status(vnode, key);
if (ret < 0)
goto bad_inode;
} else {
@@ -249,16 +278,17 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
/* it's a symlink we just created (the fileserver
* didn't give us a callback) */
vnode->cb_version = 0;
- vnode->cb_expiry = 0;
vnode->cb_type = 0;
- vnode->cb_expires = ktime_get_real_seconds();
+ vnode->cb_expires_at = 0;
} else {
vnode->cb_version = cb->version;
- vnode->cb_expiry = cb->expiry;
vnode->cb_type = cb->type;
- vnode->cb_expires = vnode->cb_expiry +
- ktime_get_real_seconds();
+ vnode->cb_expires_at = cb->expiry;
+ vnode->cb_interest = afs_get_cb_interest(cbi);
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
}
+
+ vnode->cb_expires_at += ktime_get_real_seconds();
}
/* set up caching before mapping the status, as map-status reads the
@@ -320,25 +350,34 @@ void afs_zap_data(struct afs_vnode *vnode)
*/
int afs_validate(struct afs_vnode *vnode, struct key *key)
{
+ time64_t now = ktime_get_real_seconds();
+ bool valid = false;
int ret;
_enter("{v={%x:%u} fl=%lx},%x",
vnode->fid.vid, vnode->fid.vnode, vnode->flags,
key_serial(key));
- if (vnode->cb_promised &&
- !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
- !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
- !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
- _debug("callback expired");
- set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
- } else {
- goto valid;
+ /* Quickly check the callback state. Ideally, we'd use read_seqbegin
+ * here, but we have no way to pass the net namespace to the RCU
+ * cleanup for the server record.
+ */
+ read_seqlock_excl(&vnode->cb_lock);
+
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
+ vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+ } else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) &&
+ !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+ vnode->cb_expires_at - 10 > now) {
+ valid = true;
}
+ } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ valid = true;
}
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ read_sequnlock_excl(&vnode->cb_lock);
+ if (valid)
goto valid;
mutex_lock(&vnode->validate_lock);
@@ -347,12 +386,16 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* a new promise - note that if the (parent) directory's metadata was
* changed then the security may be different and we may no longer have
* access */
- if (!vnode->cb_promised ||
- test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+ if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
_debug("not promised");
- ret = afs_vnode_fetch_status(vnode, NULL, key);
- if (ret < 0)
+ ret = afs_fetch_status(vnode, key);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ ret = -ESTALE;
+ }
goto error_unlock;
+ }
_debug("new promise [fl=%lx]", vnode->flags);
}
@@ -367,7 +410,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
afs_zap_data(vnode);
- clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+ clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
mutex_unlock(&vnode->validate_lock);
valid:
_leave(" = 0");
@@ -386,10 +429,17 @@ int afs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ int seq = 0;
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
- generic_fillattr(inode, stat);
+ do {
+ read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ generic_fillattr(inode, stat);
+ } while (need_seqretry(&vnode->cb_lock, seq));
+
+ done_seqretry(&vnode->cb_lock, seq);
return 0;
}
@@ -411,18 +461,14 @@ int afs_drop_inode(struct inode *inode)
*/
void afs_evict_inode(struct inode *inode)
{
- struct afs_permits *permits;
struct afs_vnode *vnode;
vnode = AFS_FS_I(inode);
- _enter("{%x:%u.%d} v=%u x=%u t=%u }",
+ _enter("{%x:%u.%d}",
vnode->fid.vid,
vnode->fid.vnode,
- vnode->fid.unique,
- vnode->cb_version,
- vnode->cb_expiry,
- vnode->cb_type);
+ vnode->fid.unique);
_debug("CLEAR INODE %p", inode);
@@ -431,31 +477,24 @@ void afs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- afs_give_up_callback(vnode);
-
- if (vnode->server) {
- spin_lock(&vnode->server->fs_lock);
- rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
- spin_unlock(&vnode->server->fs_lock);
- afs_put_server(vnode->server);
- vnode->server = NULL;
+ if (vnode->cb_interest) {
+ afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
+ vnode->cb_interest = NULL;
}
- ASSERT(list_empty(&vnode->writebacks));
- ASSERT(!vnode->cb_promised);
+ while (!list_empty(&vnode->wb_keys)) {
+ struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
+ struct afs_wb_key, vnode_link);
+ list_del(&wbk->vnode_link);
+ afs_put_wb_key(wbk);
+ }
#ifdef CONFIG_AFS_FSCACHE
fscache_relinquish_cookie(vnode->cache, 0);
vnode->cache = NULL;
#endif
- mutex_lock(&vnode->permits_lock);
- permits = vnode->permits;
- RCU_INIT_POINTER(vnode->permits, NULL);
- mutex_unlock(&vnode->permits_lock);
- if (permits)
- call_rcu(&permits->rcu, afs_zap_permits);
-
+ afs_put_permits(vnode->permit_cache);
_leave("");
}
@@ -464,6 +503,7 @@ void afs_evict_inode(struct inode *inode)
*/
int afs_setattr(struct dentry *dentry, struct iattr *attr)
{
+ struct afs_fs_cursor fc;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
int ret;
@@ -479,13 +519,11 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
}
/* flush any dirty data outstanding on a regular file */
- if (S_ISREG(vnode->vfs_inode.i_mode)) {
+ if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
- afs_writeback_all(vnode);
- }
if (attr->ia_valid & ATTR_FILE) {
- key = attr->ia_file->private_data;
+ key = afs_file_key(attr->ia_file);
} else {
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
@@ -494,7 +532,18 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
}
}
- ret = afs_vnode_setattr(vnode, key, attr);
+ ret = -ERESTARTSYS;
+ if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ while (afs_select_fileserver(&fc)) {
+ fc.cb_break = vnode->cb_break + vnode->cb_s_break;
+ afs_fs_setattr(&fc, attr);
+ }
+
+ afs_check_for_remote_deletion(&fc, fc.vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ ret = afs_end_vnode_operation(&fc);
+ }
+
if (!(attr->ia_valid & ATTR_FILE))
key_put(key);