summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@hammerspace.com>2020-07-17 10:35:48 -0400
committerTrond Myklebust <trond.myklebust@hammerspace.com>2020-07-17 10:35:48 -0400
commit57f80c0eda4d153cd4329228518db75523d0f06c (patch)
treeef6aef03fb94d0bf6a8b3039cb9214c50d66a9f8
parent18eb87f4443a0a23757e8a1ce3b3fa03714702fd (diff)
parent95ad37f90c338e3fd4abf61cecfe02b6f3e080f0 (diff)
downloadlinux-57f80c0eda4d153cd4329228518db75523d0f06c.tar.bz2
Merge branch 'xattr-devel'
-rw-r--r--fs/nfs/Makefile2
-rw-r--r--fs/nfs/client.c22
-rw-r--r--fs/nfs/dir.c24
-rw-r--r--fs/nfs/inode.c16
-rw-r--r--fs/nfs/nfs42.h24
-rw-r--r--fs/nfs/nfs42proc.c248
-rw-r--r--fs/nfs/nfs42xattr.c1083
-rw-r--r--fs/nfs/nfs42xdr.c438
-rw-r--r--fs/nfs/nfs4_fs.h35
-rw-r--r--fs/nfs/nfs4client.c31
-rw-r--r--fs/nfs/nfs4proc.c237
-rw-r--r--fs/nfs/nfs4super.c10
-rw-r--r--fs/nfs/nfs4xdr.c31
-rw-r--r--fs/nfs/nfstrace.h3
-rw-r--r--include/linux/nfs4.h25
-rw-r--r--include/linux/nfs_fs.h12
-rw-r--r--include/linux/nfs_fs_sb.h6
-rw-r--r--include/linux/nfs_xdr.h60
-rw-r--r--include/uapi/linux/nfs4.h3
-rw-r--r--include/uapi/linux/nfs_fs.h1
20 files changed, 2269 insertions, 42 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 2433c3e03cfa..22d11fdc6deb 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -30,7 +30,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o
nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o
-nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o
+nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o nfs42xattr.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f1ff3076e4a4..4b8cc93913f7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -50,6 +50,7 @@
#include "nfs.h"
#include "netns.h"
#include "sysfs.h"
+#include "nfs42.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
@@ -749,7 +750,7 @@ error:
static void nfs_server_set_fsinfo(struct nfs_server *server,
struct nfs_fsinfo *fsinfo)
{
- unsigned long max_rpc_payload;
+ unsigned long max_rpc_payload, raw_max_rpc_payload;
/* Work out a lot of parameters */
if (server->rsize == 0)
@@ -762,7 +763,9 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
- max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+ raw_max_rpc_payload = rpc_max_payload(server->client);
+ max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
+
if (server->rsize > max_rpc_payload)
server->rsize = max_rpc_payload;
if (server->rsize > NFS_MAX_FILE_IO_SIZE)
@@ -795,6 +798,21 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->clone_blksize = fsinfo->clone_blksize;
/* We're airborne Set socket buffersize */
rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+
+#ifdef CONFIG_NFS_V4_2
+ /*
+ * Defaults until limited by the session parameters.
+ */
+ server->gxasize = min_t(unsigned int, raw_max_rpc_payload,
+ XATTR_SIZE_MAX);
+ server->sxasize = min_t(unsigned int, raw_max_rpc_payload,
+ XATTR_SIZE_MAX);
+ server->lxasize = min_t(unsigned int, raw_max_rpc_payload,
+ nfs42_listxattr_xdrsize(XATTR_LIST_MAX));
+
+ if (fsinfo->xattr_support)
+ server->caps |= NFS_CAP_XATTR;
+#endif
}
/*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5a331da5f55a..a12f42e7d8c7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2460,7 +2460,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
return NULL;
}
-static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
+static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
@@ -2533,6 +2533,20 @@ out:
return err;
}
+int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct
+nfs_access_entry *res, bool may_block)
+{
+ int status;
+
+ status = nfs_access_get_cached_rcu(inode, cred, res);
+ if (status != 0)
+ status = nfs_access_get_cached_locked(inode, cred, res,
+ may_block);
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(nfs_access_get_cached);
+
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -2647,9 +2661,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached_rcu(inode, cred, &cache);
- if (status != 0)
- status = nfs_access_get_cached(inode, cred, &cache, may_block);
+ status = nfs_access_get_cached(inode, cred, &cache, may_block);
if (status == 0)
goto out_cached;
@@ -2661,6 +2673,10 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
* Determine which access bits we want to ask for...
*/
cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
+ if (nfs_server_capable(inode, NFS_CAP_XATTR)) {
+ cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE |
+ NFS_ACCESS_XALIST;
+ }
if (S_ISDIR(inode->i_mode))
cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
else
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 90f77fd0d1ca..aa6493905bbe 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -193,6 +193,7 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
return nfs_check_cache_invalid_not_delegated(inode, flags);
}
+EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
@@ -204,7 +205,8 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
flags &= ~NFS_INO_INVALID_OTHER;
flags &= ~(NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE);
+ | NFS_INO_REVAL_PAGECACHE
+ | NFS_INO_INVALID_XATTR);
}
if (inode->i_mapping->nrpages == 0)
@@ -233,11 +235,13 @@ static void nfs_zap_caches_locked(struct inode *inode)
| NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
+ | NFS_INO_INVALID_XATTR
| NFS_INO_REVAL_PAGECACHE);
} else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
+ | NFS_INO_INVALID_XATTR
| NFS_INO_REVAL_PAGECACHE);
nfs_zap_label_cache_locked(nfsi);
}
@@ -542,6 +546,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ if (nfs_server_capable(inode, NFS_CAP_XATTR))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1377,6 +1383,8 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode_set_iversion_raw(inode, fattr->change_attr);
if (S_ISDIR(inode->i_mode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
+ else if (nfs_server_capable(inode, NFS_CAP_XATTR))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
}
/* If we have atomic WCC data, we may update some attributes */
ts = inode->i_ctime;
@@ -1894,7 +1902,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (!(have_writers || have_delegation)) {
invalid |= NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_INVALID_XATTR;
/* Force revalidate of all attributes */
save_cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
@@ -2097,6 +2106,9 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
#if IS_ENABLED(CONFIG_NFS_V4)
nfsi->nfs4_acl = NULL;
#endif /* CONFIG_NFS_V4 */
+#ifdef CONFIG_NFS_V4_2
+ nfsi->xattr_cache = NULL;
+#endif
return &nfsi->vfs_inode;
}
EXPORT_SYMBOL_GPL(nfs_alloc_inode);
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index c891af949886..0fe5aacbcfdf 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -6,6 +6,8 @@
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H
+#include <linux/xattr.h>
+
/*
* FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support
* more? Need to consider not to pre-alloc too much for a compound.
@@ -36,5 +38,27 @@ static inline bool nfs42_files_from_same_server(struct file *in,
return nfs4_check_serverowner_major_id(c_in->cl_serverowner,
c_out->cl_serverowner);
}
+
+ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
+ void *buf, size_t buflen);
+int nfs42_proc_setxattr(struct inode *inode, const char *name,
+ const void *buf, size_t buflen, int flags);
+ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf,
+ size_t buflen, u64 *cookiep, bool *eofp);
+int nfs42_proc_removexattr(struct inode *inode, const char *name);
+
+/*
+ * Maximum XDR buffer size needed for a listxattr buffer of buflen size.
+ *
+ * The upper boundary is a buffer with all 1-byte sized attribute names.
+ * They would be 7 bytes long in the eventual buffer ("user.x\0"), and
+ * 8 bytes long XDR-encoded.
+ *
+ * Include the trailing eof word as well.
+ */
+static inline u32 nfs42_listxattr_xdrsize(u32 buflen)
+{
+ return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4;
+}
#endif /* CONFIG_NFS_V4_2 */
#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index e2ae54b35dfe..e200522469af 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1088,3 +1088,251 @@ out_put_src_lock:
nfs_put_lock_context(src_lock);
return err;
}
+
+#define NFS4XATTR_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
+
+static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs42_removexattrargs args = {
+ .fh = NFS_FH(inode),
+ .xattr_name = name,
+ };
+ struct nfs42_removexattrres res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ int ret;
+ unsigned long timestamp = jiffies;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
+ &res.seq_res, 1);
+ if (!ret)
+ nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+
+ return ret;
+}
+
+static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
+ const void *buf, size_t buflen, int flags)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct page *pages[NFS4XATTR_MAXPAGES];
+ struct nfs42_setxattrargs arg = {
+ .fh = NFS_FH(inode),
+ .xattr_pages = pages,
+ .xattr_len = buflen,
+ .xattr_name = name,
+ .xattr_flags = flags,
+ };
+ struct nfs42_setxattrres res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int ret, np;
+ unsigned long timestamp = jiffies;
+
+ if (buflen > server->sxasize)
+ return -ERANGE;
+
+ if (buflen > 0) {
+ np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages);
+ if (np < 0)
+ return np;
+ } else
+ np = 0;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
+ &res.seq_res, 1);
+
+ for (; np > 0; np--)
+ put_page(pages[np - 1]);
+
+ if (!ret)
+ nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+
+ return ret;
+}
+
+static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
+ void *buf, size_t buflen)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct page *pages[NFS4XATTR_MAXPAGES] = {};
+ struct nfs42_getxattrargs arg = {
+ .fh = NFS_FH(inode),
+ .xattr_pages = pages,
+ .xattr_len = buflen,
+ .xattr_name = name,
+ };
+ struct nfs42_getxattrres res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETXATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int ret, np;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
+ &res.seq_res, 0);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Normally, the caching is done one layer up, but for successful
+ * RPCS, always cache the result here, even if the caller was
+ * just querying the length, or if the reply was too big for
+ * the caller. This avoids a second RPC in the case of the
+ * common query-alloc-retrieve cycle for xattrs.
+ *
+ * Note that xattr_len is always capped to XATTR_SIZE_MAX.
+ */
+
+ nfs4_xattr_cache_add(inode, name, NULL, pages, res.xattr_len);
+
+ if (buflen) {
+ if (res.xattr_len > buflen)
+ return -ERANGE;
+ _copy_from_pages(buf, pages, 0, res.xattr_len);
+ }
+
+ np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE);
+ while (--np >= 0)
+ __free_page(pages[np]);
+
+ return res.xattr_len;
+}
+
+static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
+ size_t buflen, u64 *cookiep, bool *eofp)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct page **pages;
+ struct nfs42_listxattrsargs arg = {
+ .fh = NFS_FH(inode),
+ .cookie = *cookiep,
+ };
+ struct nfs42_listxattrsres res = {
+ .eof = false,
+ .xattr_buf = buf,
+ .xattr_len = buflen,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LISTXATTRS],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ u32 xdrlen;
+ int ret, np;
+
+
+ res.scratch = alloc_page(GFP_KERNEL);
+ if (!res.scratch)
+ return -ENOMEM;
+
+ xdrlen = nfs42_listxattr_xdrsize(buflen);
+ if (xdrlen > server->lxasize)
+ xdrlen = server->lxasize;
+ np = xdrlen / PAGE_SIZE + 1;
+
+ pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL);
+ if (pages == NULL) {
+ __free_page(res.scratch);
+ return -ENOMEM;
+ }
+
+ arg.xattr_pages = pages;
+ arg.count = xdrlen;
+
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
+ &res.seq_res, 0);
+
+ if (ret >= 0) {
+ ret = res.copied;
+ *cookiep = res.cookie;
+ *eofp = res.eof;
+ }
+
+ while (--np >= 0) {
+ if (pages[np])
+ __free_page(pages[np]);
+ }
+
+ __free_page(res.scratch);
+ kfree(pages);
+
+ return ret;
+
+}
+
+ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
+ void *buf, size_t buflen)
+{
+ struct nfs4_exception exception = { };
+ ssize_t err;
+
+ do {
+ err = _nfs42_proc_getxattr(inode, name, buf, buflen);
+ if (err >= 0)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+int nfs42_proc_setxattr(struct inode *inode, const char *name,
+ const void *buf, size_t buflen, int flags)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = _nfs42_proc_setxattr(inode, name, buf, buflen, flags);
+ if (!err)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf,
+ size_t buflen, u64 *cookiep, bool *eofp)
+{
+ struct nfs4_exception exception = { };
+ ssize_t err;
+
+ do {
+ err = _nfs42_proc_listxattrs(inode, buf, buflen,
+ cookiep, eofp);
+ if (err >= 0)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+int nfs42_proc_removexattr(struct inode *inode, const char *name)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = _nfs42_proc_removexattr(inode, name);
+ if (!err)
+ break;
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+
+ return err;
+}
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
new file mode 100644
index 000000000000..23fdab977a2a
--- /dev/null
+++ b/fs/nfs/nfs42xattr.c
@@ -0,0 +1,1083 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ *
+ * User extended attribute client side cache functions.
+ *
+ * Author: Frank van der Linden <fllinden@amazon.com>
+ */
+#include <linux/errno.h>
+#include <linux/nfs_fs.h>
+#include <linux/hashtable.h>
+#include <linux/refcount.h>
+#include <uapi/linux/xattr.h>
+
+#include "nfs4_fs.h"
+#include "internal.h"
+
+/*
+ * User extended attributes client side caching is implemented by having
+ * a cache structure attached to NFS inodes. This structure is allocated
+ * when needed, and freed when the cache is zapped.
+ *
+ * The cache structure contains as hash table of entries, and a pointer
+ * to a special-cased entry for the listxattr cache.
+ *
+ * Accessing and allocating / freeing the caches is done via reference
+ * counting. The cache entries use a similar refcounting scheme.
+ *
+ * This makes freeing a cache, both from the shrinker and from the
+ * zap cache path, easy. It also means that, in current use cases,
+ * the large majority of inodes will not waste any memory, as they
+ * will never have any user extended attributes assigned to them.
+ *
+ * Attribute entries are hashed in to a simple hash table. They are
+ * also part of an LRU.
+ *
+ * There are three shrinkers.
+ *
+ * Two shrinkers deal with the cache entries themselves: one for
+ * large entries (> PAGE_SIZE), and one for smaller entries. The
+ * shrinker for the larger entries works more aggressively than
+ * those for the smaller entries.
+ *
+ * The other shrinker frees the cache structures themselves.
+ */
+
+/*
+ * 64 buckets is a good default. There is likely no reasonable
+ * workload that uses more than even 64 user extended attributes.
+ * You can certainly add a lot more - but you get what you ask for
+ * in those circumstances.
+ */
+#define NFS4_XATTR_HASH_SIZE 64
+
+#define NFSDBG_FACILITY NFSDBG_XATTRCACHE
+
+struct nfs4_xattr_cache;
+struct nfs4_xattr_entry;
+
+struct nfs4_xattr_bucket {
+ spinlock_t lock;
+ struct hlist_head hlist;
+ struct nfs4_xattr_cache *cache;
+ bool draining;
+};
+
+struct nfs4_xattr_cache {
+ struct kref ref;
+ spinlock_t hash_lock; /* protects hashtable and lru */
+ struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE];
+ struct list_head lru;
+ struct list_head dispose;
+ atomic_long_t nent;
+ spinlock_t listxattr_lock;
+ struct inode *inode;
+ struct nfs4_xattr_entry *listxattr;
+ struct work_struct work;
+};
+
+struct nfs4_xattr_entry {
+ struct kref ref;
+ struct hlist_node hnode;
+ struct list_head lru;
+ struct list_head dispose;
+ char *xattr_name;
+ void *xattr_value;
+ size_t xattr_size;
+ struct nfs4_xattr_bucket *bucket;
+ uint32_t flags;
+};
+
+#define NFS4_XATTR_ENTRY_EXTVAL 0x0001
+
+/*
+ * LRU list of NFS inodes that have xattr caches.
+ */
+static struct list_lru nfs4_xattr_cache_lru;
+static struct list_lru nfs4_xattr_entry_lru;
+static struct list_lru nfs4_xattr_large_entry_lru;
+
+static struct kmem_cache *nfs4_xattr_cache_cachep;
+
+static struct workqueue_struct *nfs4_xattr_cache_wq;
+
+/*
+ * Hashing helper functions.
+ */
+static void
+nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache)
+{
+ unsigned int i;
+
+ for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
+ INIT_HLIST_HEAD(&cache->buckets[i].hlist);
+ spin_lock_init(&cache->buckets[i].lock);
+ cache->buckets[i].cache = cache;
+ cache->buckets[i].draining = false;
+ }
+}
+
+/*
+ * Locking order:
+ * 1. inode i_lock or bucket lock
+ * 2. list_lru lock (taken by list_lru_* functions)
+ */
+
+/*
+ * Wrapper functions to add a cache entry to the right LRU.
+ */
+static bool
+nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry)
+{
+ struct list_lru *lru;
+
+ lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ return list_lru_add(lru, &entry->lru);
+}
+
+static bool
+nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
+{
+ struct list_lru *lru;
+
+ lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ return list_lru_del(lru, &entry->lru);
+}
+
+/*
+ * This function allocates cache entries. They are the normal
+ * extended attribute name/value pairs, but may also be a listxattr
+ * cache. Those allocations use the same entry so that they can be
+ * treated as one by the memory shrinker.
+ *
+ * xattr cache entries are allocated together with names. If the
+ * value fits in to one page with the entry structure and the name,
+ * it will also be part of the same allocation (kmalloc). This is
+ * expected to be the vast majority of cases. Larger allocations
+ * have a value pointer that is allocated separately by kvmalloc.
+ *
+ * Parameters:
+ *
+ * @name: Name of the extended attribute. NULL for listxattr cache
+ * entry.
+ * @value: Value of attribute, or listxattr cache. NULL if the
+ * value is to be copied from pages instead.
+ * @pages: Pages to copy the value from, if not NULL. Passed in to
+ * make it easier to copy the value after an RPC, even if
+ * the value will not be passed up to application (e.g.
+ * for a 'query' getxattr with NULL buffer).
+ * @len: Length of the value. Can be 0 for zero-length attribues.
+ * @value and @pages will be NULL if @len is 0.
+ */
+static struct nfs4_xattr_entry *
+nfs4_xattr_alloc_entry(const char *name, const void *value,
+ struct page **pages, size_t len)
+{
+ struct nfs4_xattr_entry *entry;
+ void *valp;
+ char *namep;
+ size_t alloclen, slen;
+ char *buf;
+ uint32_t flags;
+
+ BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) +
+ XATTR_NAME_MAX + 1 > PAGE_SIZE);
+
+ alloclen = sizeof(struct nfs4_xattr_entry);
+ if (name != NULL) {
+ slen = strlen(name) + 1;
+ alloclen += slen;
+ } else
+ slen = 0;
+
+ if (alloclen + len <= PAGE_SIZE) {
+ alloclen += len;
+ flags = 0;
+ } else {
+ flags = NFS4_XATTR_ENTRY_EXTVAL;
+ }
+
+ buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ if (buf == NULL)
+ return NULL;
+ entry = (struct nfs4_xattr_entry *)buf;
+
+ if (name != NULL) {
+ namep = buf + sizeof(struct nfs4_xattr_entry);
+ memcpy(namep, name, slen);
+ } else {
+ namep = NULL;
+ }
+
+
+ if (flags & NFS4_XATTR_ENTRY_EXTVAL) {
+ valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ if (valp == NULL) {
+ kfree(buf);
+ return NULL;
+ }
+ } else if (len != 0) {
+ valp = buf + sizeof(struct nfs4_xattr_entry) + slen;
+ } else
+ valp = NULL;
+
+ if (valp != NULL) {
+ if (value != NULL)
+ memcpy(valp, value, len);
+ else
+ _copy_from_pages(valp, pages, 0, len);
+ }
+
+ entry->flags = flags;
+ entry->xattr_value = valp;
+ kref_init(&entry->ref);
+ entry->xattr_name = namep;
+ entry->xattr_size = len;
+ entry->bucket = NULL;
+ INIT_LIST_HEAD(&entry->lru);
+ INIT_LIST_HEAD(&entry->dispose);
+ INIT_HLIST_NODE(&entry->hnode);
+
+ return entry;
+}
+
+static void
+nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry)
+{
+ if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL)
+ kvfree(entry->xattr_value);
+ kfree(entry);
+}
+
+static void
+nfs4_xattr_free_entry_cb(struct kref *kref)
+{
+ struct nfs4_xattr_entry *entry;
+
+ entry = container_of(kref, struct nfs4_xattr_entry, ref);
+
+ if (WARN_ON(!list_empty(&entry->lru)))
+ return;
+
+ nfs4_xattr_free_entry(entry);
+}
+
+static void
+nfs4_xattr_free_cache_cb(struct kref *kref)
+{
+ struct nfs4_xattr_cache *cache;
+ int i;
+
+ cache = container_of(kref, struct nfs4_xattr_cache, ref);
+
+ for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
+ if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist)))
+ return;
+ cache->buckets[i].draining = false;
+ }
+
+ cache->listxattr = NULL;
+
+ kmem_cache_free(nfs4_xattr_cache_cachep, cache);
+
+}
+
+static struct nfs4_xattr_cache *
+nfs4_xattr_alloc_cache(void)
+{
+ struct nfs4_xattr_cache *cache;
+
+ cache = kmem_cache_alloc(nfs4_xattr_cache_cachep,
+ GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ if (cache == NULL)
+ return NULL;
+
+ kref_init(&cache->ref);
+ atomic_long_set(&cache->nent, 0);
+
+ return cache;
+}
+
+/*
+ * Set the listxattr cache, which is a special-cased cache entry.
+ * The special value ERR_PTR(-ESTALE) is used to indicate that
+ * the cache is being drained - this prevents a new listxattr
+ * cache from being added to what is now a stale cache.
+ */
+static int
+nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache,
+ struct nfs4_xattr_entry *new)
+{
+ struct nfs4_xattr_entry *old;
+ int ret = 1;
+
+ spin_lock(&cache->listxattr_lock);
+
+ old = cache->listxattr;
+
+ if (old == ERR_PTR(-ESTALE)) {
+ ret = 0;
+ goto out;
+ }
+
+ cache->listxattr = new;
+ if (new != NULL && new != ERR_PTR(-ESTALE))
+ nfs4_xattr_entry_lru_add(new);
+
+ if (old != NULL) {
+ nfs4_xattr_entry_lru_del(old);
+ kref_put(&old->ref, nfs4_xattr_free_entry_cb);
+ }
+out:
+ spin_unlock(&cache->listxattr_lock);
+
+ return ret;
+}
+
+/*
+ * Unlink a cache from its parent inode, clearing out an invalid
+ * cache. Must be called with i_lock held.
+ */
+static struct nfs4_xattr_cache *
+nfs4_xattr_cache_unlink(struct inode *inode)
+{
+ struct nfs_inode *nfsi;
+ struct nfs4_xattr_cache *oldcache;
+
+ nfsi = NFS_I(inode);
+
+ oldcache = nfsi->xattr_cache;
+ if (oldcache != NULL) {
+ list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru);
+ oldcache->inode = NULL;
+ }
+ nfsi->xattr_cache = NULL;
+ nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR;
+
+ return oldcache;
+
+}
+
+/*
+ * Discard a cache. Usually called by a worker, since walking all
+ * the entries can take up some cycles that we don't want to waste
+ * in the I/O path. Can also be called from the shrinker callback.
+ *
+ * The cache is dead, it has already been unlinked from its inode,
+ * and no longer appears on the cache LRU list.
+ *
+ * Mark all buckets as draining, so that no new entries are added. This
+ * could still happen in the unlikely, but possible case that another
+ * thread had grabbed a reference before it was unlinked from the inode,
+ * and is still holding it for an add operation.
+ *
+ * Remove all entries from the LRU lists, so that there is no longer
+ * any way to 'find' this cache. Then, remove the entries from the hash
+ * table.
+ *
+ * At that point, the cache will remain empty and can be freed when the final
+ * reference drops, which is very likely the kref_put at the end of
+ * this function, or the one called immediately afterwards in the
+ * shrinker callback.
+ */
+static void
+nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache)
+{
+ unsigned int i;
+ struct nfs4_xattr_entry *entry;
+ struct nfs4_xattr_bucket *bucket;
+ struct hlist_node *n;
+
+ nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE));
+
+ for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
+ bucket = &cache->buckets[i];
+
+ spin_lock(&bucket->lock);
+ bucket->draining = true;
+ hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) {
+ nfs4_xattr_entry_lru_del(entry);
+ hlist_del_init(&entry->hnode);
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ }
+ spin_unlock(&bucket->lock);
+ }
+
+ atomic_long_set(&cache->nent, 0);
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+static void
+nfs4_xattr_discard_cache_worker(struct work_struct *work)
+{
+ struct nfs4_xattr_cache *cache = container_of(work,
+ struct nfs4_xattr_cache, work);
+
+ nfs4_xattr_discard_cache(cache);
+}
+
+static void
+nfs4_xattr_reap_cache(struct nfs4_xattr_cache *cache)
+{
+ queue_work(nfs4_xattr_cache_wq, &cache->work);
+}
+
+/*
+ * Get a referenced copy of the cache structure. Avoid doing allocs
+ * while holding i_lock. Which means that we do some optimistic allocation,
+ * and might have to free the result in rare cases.
+ *
+ * This function only checks the NFS_INO_INVALID_XATTR cache validity bit
+ * and acts accordingly, replacing the cache when needed. For the read case
+ * (!add), this means that the caller must make sure that the cache
+ * is valid before caling this function. getxattr and listxattr call
+ * revalidate_inode to do this. The attribute cache timeout (for the
+ * non-delegated case) is expected to be dealt with in the revalidate
+ * call.
+ */
+
+static struct nfs4_xattr_cache *
+nfs4_xattr_get_cache(struct inode *inode, int add)
+{
+ struct nfs_inode *nfsi;
+ struct nfs4_xattr_cache *cache, *oldcache, *newcache;
+
+ nfsi = NFS_I(inode);
+
+ cache = oldcache = NULL;
+
+ spin_lock(&inode->i_lock);
+
+ if (nfsi->cache_validity & NFS_INO_INVALID_XATTR)
+ oldcache = nfs4_xattr_cache_unlink(inode);
+ else
+ cache = nfsi->xattr_cache;
+
+ if (cache != NULL)
+ kref_get(&cache->ref);
+
+ spin_unlock(&inode->i_lock);
+
+ if (add && cache == NULL) {
+ newcache = NULL;
+
+ cache = nfs4_xattr_alloc_cache();
+ if (cache == NULL)
+ goto out;
+
+ spin_lock(&inode->i_lock);
+ if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) {
+ /*
+ * The cache was invalidated again. Give up,
+ * since what we want to enter is now likely
+ * outdated anyway.
+ */
+ spin_unlock(&inode->i_lock);
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+ cache = NULL;
+ goto out;
+ }
+
+ /*
+ * Check if someone beat us to it.
+ */
+ if (nfsi->xattr_cache != NULL) {
+ newcache = nfsi->xattr_cache;
+ kref_get(&newcache->ref);
+ } else {
+ kref_get(&cache->ref);
+ nfsi->xattr_cache = cache;
+ cache->inode = inode;
+ list_lru_add(&nfs4_xattr_cache_lru, &cache->lru);
+ }
+
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * If there was a race, throw away the cache we just
+ * allocated, and use the new one allocated by someone
+ * else.
+ */
+ if (newcache != NULL) {
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+ cache = newcache;
+ }
+ }
+
+out:
+ /*
+ * Discarding an old cache is done via a workqueue.
+ */
+ if (oldcache != NULL)
+ nfs4_xattr_reap_cache(oldcache);
+
+ return cache;
+}
+
+static inline struct nfs4_xattr_bucket *
+nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name)
+{
+ return &cache->buckets[jhash(name, strlen(name), 0) &
+ (ARRAY_SIZE(cache->buckets) - 1)];
+}
+
+static struct nfs4_xattr_entry *
+nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name)
+{
+ struct nfs4_xattr_entry *entry;
+
+ entry = NULL;
+
+ hlist_for_each_entry(entry, &bucket->hlist, hnode) {
+ if (!strcmp(entry->xattr_name, name))
+ break;
+ }
+
+ return entry;
+}
+
+static int
+nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache,
+ struct nfs4_xattr_entry *entry)
+{
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_entry *oldentry = NULL;
+ int ret = 1;
+
+ bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name);
+ entry->bucket = bucket;
+
+ spin_lock(&bucket->lock);
+
+ if (bucket->draining) {
+ ret = 0;
+ goto out;
+ }
+
+ oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name);
+ if (oldentry != NULL) {
+ hlist_del_init(&oldentry->hnode);
+ nfs4_xattr_entry_lru_del(oldentry);
+ } else {
+ atomic_long_inc(&cache->nent);
+ }
+
+ hlist_add_head(&entry->hnode, &bucket->hlist);
+ nfs4_xattr_entry_lru_add(entry);
+
+out:
+ spin_unlock(&bucket->lock);
+
+ if (oldentry != NULL)
+ kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb);
+
+ return ret;
+}
+
+static void
+nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name)
+{
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_entry *entry;
+
+ bucket = nfs4_xattr_hash_bucket(cache, name);
+
+ spin_lock(&bucket->lock);
+
+ entry = nfs4_xattr_get_entry(bucket, name);
+ if (entry != NULL) {
+ hlist_del_init(&entry->hnode);
+ nfs4_xattr_entry_lru_del(entry);
+ atomic_long_dec(&cache->nent);
+ }
+
+ spin_unlock(&bucket->lock);
+
+ if (entry != NULL)
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+}
+
+static struct nfs4_xattr_entry *
+nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name)
+{
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_entry *entry;
+
+ bucket = nfs4_xattr_hash_bucket(cache, name);
+
+ spin_lock(&bucket->lock);
+
+ entry = nfs4_xattr_get_entry(bucket, name);
+ if (entry != NULL)
+ kref_get(&entry->ref);
+
+ spin_unlock(&bucket->lock);
+
+ return entry;
+}
+
+/*
+ * Entry point to retrieve an entry from the cache.
+ */
+ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf,
+ ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+ ssize_t ret;
+
+ cache = nfs4_xattr_get_cache(inode, 0);
+ if (cache == NULL)
+ return -ENOENT;
+
+ ret = 0;
+ entry = nfs4_xattr_hash_find(cache, name);
+
+ if (entry != NULL) {
+ dprintk("%s: cache hit '%s', len %lu\n", __func__,
+ entry->xattr_name, (unsigned long)entry->xattr_size);
+ if (buflen == 0) {
+ /* Length probe only */
+ ret = entry->xattr_size;
+ } else if (buflen < entry->xattr_size)
+ ret = -ERANGE;
+ else {
+ memcpy(buf, entry->xattr_value, entry->xattr_size);
+ ret = entry->xattr_size;
+ }
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ } else {
+ dprintk("%s: cache miss '%s'\n", __func__, name);
+ ret = -ENOENT;
+ }
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+
+ return ret;
+}
+
+/*
+ * Retrieve a cached list of xattrs from the cache.
+ */
+ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+ ssize_t ret;
+
+ cache = nfs4_xattr_get_cache(inode, 0);
+ if (cache == NULL)
+ return -ENOENT;
+
+ spin_lock(&cache->listxattr_lock);
+
+ entry = cache->listxattr;
+
+ if (entry != NULL && entry != ERR_PTR(-ESTALE)) {
+ if (buflen == 0) {
+ /* Length probe only */
+ ret = entry->xattr_size;
+ } else if (entry->xattr_size > buflen)
+ ret = -ERANGE;
+ else {
+ memcpy(buf, entry->xattr_value, entry->xattr_size);
+ ret = entry->xattr_size;
+ }
+ } else {
+ ret = -ENOENT;
+ }
+
+ spin_unlock(&cache->listxattr_lock);
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+
+ return ret;
+}
+
+/*
+ * Add an xattr to the cache.
+ *
+ * This also invalidates the xattr list cache.
+ */
+void nfs4_xattr_cache_add(struct inode *inode, const char *name,
+ const char *buf, struct page **pages, ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+
+ dprintk("%s: add '%s' len %lu\n", __func__,
+ name, (unsigned long)buflen);
+
+ cache = nfs4_xattr_get_cache(inode, 1);
+ if (cache == NULL)
+ return;
+
+ entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen);
+ if (entry == NULL)
+ goto out;
+
+ (void)nfs4_xattr_set_listcache(cache, NULL);
+
+ if (!nfs4_xattr_hash_add(cache, entry))
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+
+out:
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+
+/*
+ * Remove an xattr from the cache.
+ *
+ * This also invalidates the xattr list cache.
+ */
+void nfs4_xattr_cache_remove(struct inode *inode, const char *name)
+{
+ struct nfs4_xattr_cache *cache;
+
+ dprintk("%s: remove '%s'\n", __func__, name);
+
+ cache = nfs4_xattr_get_cache(inode, 0);
+ if (cache == NULL)
+ return;
+
+ (void)nfs4_xattr_set_listcache(cache, NULL);
+ nfs4_xattr_hash_remove(cache, name);
+
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+/*
+ * Cache listxattr output, replacing any possible old one.
+ */
+void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf,
+ ssize_t buflen)
+{
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry;
+
+ cache = nfs4_xattr_get_cache(inode, 1);
+ if (cache == NULL)
+ return;
+
+ entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen);
+ if (entry == NULL)
+ goto out;
+
+ /*
+ * This is just there to be able to get to bucket->cache,
+ * which is obviously the same for all buckets, so just
+ * use bucket 0.
+ */
+ entry->bucket = &cache->buckets[0];
+
+ if (!nfs4_xattr_set_listcache(cache, entry))
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+
+out:
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+}
+
+/*
+ * Zap the entire cache. Called when an inode is evicted.
+ */
+void nfs4_xattr_cache_zap(struct inode *inode)
+{
+ struct nfs4_xattr_cache *oldcache;
+
+ spin_lock(&inode->i_lock);
+ oldcache = nfs4_xattr_cache_unlink(inode);
+ spin_unlock(&inode->i_lock);
+
+ if (oldcache)
+ nfs4_xattr_discard_cache(oldcache);
+}
+
+/*
+ * The entry LRU is shrunk more aggressively than the cache LRU,
+ * by settings @seeks to 1.
+ *
+ * Cache structures are freed only when they've become empty, after
+ * pruning all but one entry.
+ */
+
+static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+
+static struct shrinker nfs4_xattr_cache_shrinker = {
+ .count_objects = nfs4_xattr_cache_count,
+ .scan_objects = nfs4_xattr_cache_scan,
+ .seeks = DEFAULT_SEEKS,
+ .flags = SHRINKER_MEMCG_AWARE,
+};
+
+static struct shrinker nfs4_xattr_entry_shrinker = {
+ .count_objects = nfs4_xattr_entry_count,
+ .scan_objects = nfs4_xattr_entry_scan,
+ .seeks = DEFAULT_SEEKS,
+ .batch = 512,
+ .flags = SHRINKER_MEMCG_AWARE,
+};
+
+static struct shrinker nfs4_xattr_large_entry_shrinker = {
+ .count_objects = nfs4_xattr_entry_count,
+ .scan_objects = nfs4_xattr_entry_scan,
+ .seeks = 1,
+ .batch = 512,
+ .flags = SHRINKER_MEMCG_AWARE,
+};
+
+static enum lru_status
+cache_lru_isolate(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+{
+ struct list_head *dispose = arg;
+ struct inode *inode;
+ struct nfs4_xattr_cache *cache = container_of(item,
+ struct nfs4_xattr_cache, lru);
+
+ if (atomic_long_read(&cache->nent) > 1)
+ return LRU_SKIP;
+
+ /*
+ * If a cache structure is on the LRU list, we know that
+ * its inode is valid. Try to lock it to break the link.
+ * Since we're inverting the lock order here, only try.
+ */
+ inode = cache->inode;
+
+ if (!spin_trylock(&inode->i_lock))
+ return LRU_SKIP;
+
+ kref_get(&cache->ref);
+
+ cache->inode = NULL;
+ NFS_I(inode)->xattr_cache = NULL;
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR;
+ list_lru_isolate(lru, &cache->lru);
+
+ spin_unlock(&inode->i_lock);
+
+ list_add_tail(&cache->dispose, dispose);
+ return LRU_REMOVED;
+}
+
+static unsigned long
+nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ LIST_HEAD(dispose);
+ unsigned long freed;
+ struct nfs4_xattr_cache *cache;
+
+ freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc,
+ cache_lru_isolate, &dispose);
+ while (!list_empty(&dispose)) {
+ cache = list_first_entry(&dispose, struct nfs4_xattr_cache,
+ dispose);
+ list_del_init(&cache->dispose);
+ nfs4_xattr_discard_cache(cache);
+ kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
+ }
+
+ return freed;
+}
+
+
+static unsigned long
+nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long count;
+
+ count = list_lru_count(&nfs4_xattr_cache_lru);
+ return vfs_pressure_ratio(count);
+}
+
+static enum lru_status
+entry_lru_isolate(struct list_head *item,
+ struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+{
+ struct list_head *dispose = arg;
+ struct nfs4_xattr_bucket *bucket;
+ struct nfs4_xattr_cache *cache;
+ struct nfs4_xattr_entry *entry = container_of(item,
+ struct nfs4_xattr_entry, lru);
+
+ bucket = entry->bucket;
+ cache = bucket->cache;
+
+ /*
+ * Unhook the entry from its parent (either a cache bucket
+ * or a cache structure if it's a listxattr buf), so that
+ * it's no longer found. Then add it to the isolate list,
+ * to be freed later.
+ *
+ * In both cases, we're reverting lock order, so use
+ * trylock and skip the entry if we can't get the lock.
+ */
+ if (entry->xattr_name != NULL) {
+ /* Regular cache entry */
+ if (!spin_trylock(&bucket->lock))
+ return LRU_SKIP;
+
+ kref_get(&entry->ref);
+
+ hlist_del_init(&entry->hnode);
+ atomic_long_dec(&cache->nent);
+ list_lru_isolate(lru, &entry->lru);
+
+ spin_unlock(&bucket->lock);
+ } else {
+ /* Listxattr cache entry */
+ if (!spin_trylock(&cache->listxattr_lock))
+ return LRU_SKIP;
+
+ kref_get(&entry->ref);
+
+ cache->listxattr = NULL;
+ list_lru_isolate(lru, &entry->lru);
+
+ spin_unlock(&cache->listxattr_lock);
+ }
+
+ list_add_tail(&entry->dispose, dispose);
+ return LRU_REMOVED;
+}
+
+static unsigned long
+nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ LIST_HEAD(dispose);
+ unsigned long freed;
+ struct nfs4_xattr_entry *entry;
+ struct list_lru *lru;
+
+ lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose);
+
+ while (!list_empty(&dispose)) {
+ entry = list_first_entry(&dispose, struct nfs4_xattr_entry,
+ dispose);
+ list_del_init(&entry->dispose);
+
+ /*
+ * Drop two references: the one that we just grabbed
+ * in entry_lru_isolate, and the one that was set
+ * when the entry was first allocated.
+ */
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
+ }
+
+ return freed;
+}
+
+static unsigned long
+nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long count;
+ struct list_lru *lru;
+
+ lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
+
+ count = list_lru_count(lru);
+ return vfs_pressure_ratio(count);
+}
+
+
+static void nfs4_xattr_cache_init_once(void *p)
+{
+ struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p;
+
+ spin_lock_init(&cache->listxattr_lock);
+ atomic_long_set(&cache->nent, 0);
+ nfs4_xattr_hash_init(cache);
+ cache->listxattr = NULL;
+ INIT_WORK(&cache->work, nfs4_xattr_discard_cache_worker);
+ INIT_LIST_HEAD(&cache->lru);
+ INIT_LIST_HEAD(&cache->dispose);
+}
+
+int __init nfs4_xattr_cache_init(void)
+{
+ int ret = 0;
+
+ nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
+ sizeof(struct nfs4_xattr_cache), 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ nfs4_xattr_cache_init_once);
+ if (nfs4_xattr_cache_cachep == NULL)
+ return -ENOMEM;
+
+ ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru,
+ &nfs4_xattr_large_entry_shrinker);
+ if (ret)
+ goto out4;
+
+ ret = list_lru_init_memcg(&nfs4_xattr_entry_lru,
+ &nfs4_xattr_entry_shrinker);
+ if (ret)
+ goto out3;
+
+ ret = list_lru_init_memcg(&nfs4_xattr_cache_lru,
+ &nfs4_xattr_cache_shrinker);
+ if (ret)
+ goto out2;
+
+ nfs4_xattr_cache_wq = alloc_workqueue("nfs4_xattr", WQ_MEM_RECLAIM, 0);
+ if (nfs4_xattr_cache_wq == NULL)
+ goto out1;
+
+ ret = register_shrinker(&nfs4_xattr_cache_shrinker);
+ if (ret)
+ goto out0;
+
+ ret = register_shrinker(&nfs4_xattr_entry_shrinker);
+ if (ret)
+ goto out;
+
+ ret = register_shrinker(&nfs4_xattr_large_entry_shrinker);
+ if (!ret)
+ return 0;
+
+ unregister_shrinker(&nfs4_xattr_entry_shrinker);
+out:
+ unregister_shrinker(&nfs4_xattr_cache_shrinker);
+out0:
+ destroy_workqueue(nfs4_xattr_cache_wq);
+out1:
+ list_lru_destroy(&nfs4_xattr_cache_lru);
+out2:
+ list_lru_destroy(&nfs4_xattr_entry_lru);
+out3:
+ list_lru_destroy(&nfs4_xattr_large_entry_lru);
+out4:
+ kmem_cache_destroy(nfs4_xattr_cache_cachep);
+
+ return ret;
+}
+
+void nfs4_xattr_cache_exit(void)
+{
+ unregister_shrinker(&nfs4_xattr_entry_shrinker);
+ unregister_shrinker(&nfs4_xattr_cache_shrinker);
+ list_lru_destroy(&nfs4_xattr_entry_lru);
+ list_lru_destroy(&nfs4_xattr_cache_lru);
+ kmem_cache_destroy(nfs4_xattr_cache_cachep);
+ destroy_workqueue(nfs4_xattr_cache_wq);
+}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index c03f3246d6c5..cc50085e151c 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -169,6 +169,78 @@
decode_clone_maxsz + \
decode_getattr_maxsz)
+/* Not limited by NFS itself, limited by the generic xattr code */
+#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX)
+
+#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1)
+#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
+ 1 + nfs4_xattr_name_maxsz + 1)
+#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
+#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1)
+#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1)
+#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \
+ decode_change_info_maxsz)
+
+#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getxattr_maxsz)
+#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getxattr_maxsz)
+#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_setxattr_maxsz)
+#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_setxattr_maxsz)
+#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_listxattrs_maxsz)
+#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_listxattrs_maxsz)
+#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_removexattr_maxsz)
+#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_removexattr_maxsz)
+
+/*
+ * These values specify the maximum amount of data that is not
+ * associated with the extended attribute name or extended
+ * attribute list in the SETXATTR, GETXATTR and LISTXATTR
+ * respectively.
+ */
+const u32 nfs42_maxsetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ compound_encode_hdr_maxsz +
+ encode_sequence_maxsz +
+ encode_putfh_maxsz + 1 +
+ nfs4_xattr_name_maxsz)
+ * XDR_UNIT);
+
+const u32 nfs42_maxgetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ compound_decode_hdr_maxsz +
+ decode_sequence_maxsz +
+ decode_putfh_maxsz + 1) * XDR_UNIT);
+
+const u32 nfs42_maxlistxattrs_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ compound_decode_hdr_maxsz +
+ decode_sequence_maxsz +
+ decode_putfh_maxsz + 3) * XDR_UNIT);
+
static void encode_fallocate(struct xdr_stream *xdr,
const struct nfs42_falloc_args *args)
{
@@ -333,6 +405,210 @@ static void encode_layouterror(struct xdr_stream *xdr,
encode_device_error(xdr, &args->errors[0]);
}
+static void encode_setxattr(struct xdr_stream *xdr,
+ const struct nfs42_setxattrargs *arg,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ BUILD_BUG_ON(XATTR_CREATE != SETXATTR4_CREATE);
+ BUILD_BUG_ON(XATTR_REPLACE != SETXATTR4_REPLACE);
+
+ encode_op_hdr(xdr, OP_SETXATTR, decode_setxattr_maxsz, hdr);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(arg->xattr_flags);
+ encode_string(xdr, strlen(arg->xattr_name), arg->xattr_name);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(arg->xattr_len);
+ if (arg->xattr_len)
+ xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len);
+}
+
+static int decode_setxattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_SETXATTR);
+ if (status)
+ goto out;
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+
+static void encode_getxattr(struct xdr_stream *xdr, const char *name,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_GETXATTR, decode_getxattr_maxsz, hdr);
+ encode_string(xdr, strlen(name), name);
+}
+
+static int decode_getxattr(struct xdr_stream *xdr,
+ struct nfs42_getxattrres *res,
+ struct rpc_rqst *req)
+{
+ int status;
+ __be32 *p;
+ u32 len, rdlen;
+
+ status = decode_op_hdr(xdr, OP_GETXATTR);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+ if (len > req->rq_rcv_buf.page_len)
+ return -ERANGE;
+
+ res->xattr_len = len;
+
+ if (len > 0) {
+ rdlen = xdr_read_pages(xdr, len);
+ if (rdlen < len)
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static void encode_removexattr(struct xdr_stream *xdr, const char *name,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_REMOVEXATTR, decode_removexattr_maxsz, hdr);
+ encode_string(xdr, strlen(name), name);
+}
+
+
+static int decode_removexattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_REMOVEXATTR);
+ if (status)
+ goto out;
+
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+static void encode_listxattrs(struct xdr_stream *xdr,
+ const struct nfs42_listxattrsargs *arg,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz + 1, hdr);
+
+ p = reserve_space(xdr, 12);
+ if (unlikely(!p))
+ return;
+
+ p = xdr_encode_hyper(p, arg->cookie);
+ /*
+ * RFC 8276 says to specify the full max length of the LISTXATTRS
+ * XDR reply. Count is set to the XDR length of the names array
+ * plus the EOF marker. So, add the cookie and the names count.
+ */
+ *p = cpu_to_be32(arg->count + 8 + 4);
+}
+
+static int decode_listxattrs(struct xdr_stream *xdr,
+ struct nfs42_listxattrsres *res)
+{
+ int status;
+ __be32 *p;
+ u32 count, len, ulen;
+ size_t left, copied;
+ char *buf;
+
+ status = decode_op_hdr(xdr, OP_LISTXATTRS);
+ if (status) {
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL
+ * should be translated to ERANGE.
+ */
+ if (status == -ETOOSMALL)
+ status = -ERANGE;
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ return -EIO;
+
+ xdr_decode_hyper(p, &res->cookie);
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ left = res->xattr_len;
+ buf = res->xattr_buf;
+
+ count = be32_to_cpup(p);
+ copied = 0;
+
+ /*
+ * We have asked for enough room to encode the maximum number
+ * of possible attribute names, so everything should fit.
+ *
+ * But, don't rely on that assumption. Just decode entries
+ * until they don't fit anymore, just in case the server did
+ * something odd.
+ */
+ while (count--) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+ if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ return -EIO;
+
+ ulen = len + XATTR_USER_PREFIX_LEN + 1;
+ if (buf) {
+ if (ulen > left) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+ memcpy(buf + XATTR_USER_PREFIX_LEN, p, len);
+
+ buf[ulen - 1] = 0;
+ buf += ulen;
+ left -= ulen;
+ }
+ copied += ulen;
+ }
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ res->eof = be32_to_cpup(p);
+ res->copied = copied;
+
+out:
+ if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX)
+ status = -E2BIG;
+
+ return status;
+}
+
/*
* Encode ALLOCATE request
*/
@@ -988,4 +1264,166 @@ out:
return status;
}
+#ifdef CONFIG_NFS_V4_2
+static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_setxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setxattr(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_setxattrres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, req);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+
+ status = decode_setxattr(xdr, &res->cinfo);
+out:
+ return status;
+}
+
+static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_getxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+ size_t plen;
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getxattr(xdr, args->xattr_name, &hdr);
+
+ plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX;
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen,
+ hdr.replen);
+ req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr, void *data)
+{
+ struct nfs42_getxattrres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_getxattr(xdr, res, rqstp);
+out:
+ return status;
+}
+
+static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_listxattrsargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_listxattrs(xdr, args, &hdr);
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
+ hdr.replen);
+ req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr, void *data)
+{
+ struct nfs42_listxattrsres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE);
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_listxattrs(xdr, res);
+out:
+ return status;
+}
+
+static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_removexattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_removexattr(xdr, args->xattr_name, &hdr);
+ encode_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, void *data)
+{
+ struct nfs42_removexattrres *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, req);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+
+ status = decode_removexattr(xdr, &res->cinfo);
+out:
+ return status;
+}
+#endif
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 2b7f6dcd2eb8..7e16a586f3fc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -324,6 +324,13 @@ extern int update_open_stateid(struct nfs4_state *state,
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
+extern void nfs4_update_changeattr(struct inode *dir,
+ struct nfs4_change_info *cinfo,
+ unsigned long timestamp,
+ unsigned long cache_validity);
+extern int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen,
+ struct page **pages);
+
#if defined(CONFIG_NFS_V4_1)
extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *);
@@ -557,6 +564,12 @@ static inline void nfs4_unregister_sysctl(void)
/* nfs4xdr.c */
extern const struct rpc_procinfo nfs4_procedures[];
+#ifdef CONFIG_NFS_V4_2
+extern const u32 nfs42_maxsetxattr_overhead;
+extern const u32 nfs42_maxgetxattr_overhead;
+extern const u32 nfs42_maxlistxattrs_overhead;
+#endif
+
struct nfs4_mount_data;
/* callback_xdr.c */
@@ -613,12 +626,34 @@ static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *
nfs4_stateid_match_other(&state->open_stateid, stateid);
}
+/* nfs42xattr.c */
+#ifdef CONFIG_NFS_V4_2
+extern int __init nfs4_xattr_cache_init(void);
+extern void nfs4_xattr_cache_exit(void);
+extern void nfs4_xattr_cache_add(struct inode *inode, const char *name,
+ const char *buf, struct page **pages,
+ ssize_t buflen);
+extern void nfs4_xattr_cache_remove(struct inode *inode, const char *name);
+extern ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name,
+ char *buf, ssize_t buflen);
+extern void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf,
+ ssize_t buflen);
+extern ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf,
+ ssize_t buflen);
+extern void nfs4_xattr_cache_zap(struct inode *inode);
#else
+static inline void nfs4_xattr_cache_zap(struct inode *inode)
+{
+}
+#endif /* CONFIG_NFS_V4_2 */
+
+#else /* CONFIG_NFS_V4 */
#define nfs4_close_state(a, b) do { } while (0)
#define nfs4_close_sync(a, b) do { } while (0)
#define nfs4_state_protect(a, b, c, d) do { } while (0)
#define nfs4_state_protect_write(a, b, c, d) do { } while (0)
+
#endif /* CONFIG_NFS_V4 */
#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0bd77cc1f639..c41cbd86612c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -992,6 +992,36 @@ static void nfs4_session_limit_rwsize(struct nfs_server *server)
#endif /* CONFIG_NFS_V4_1 */
}
+/*
+ * Limit xattr sizes using the channel attributes.
+ */
+static void nfs4_session_limit_xasize(struct nfs_server *server)
+{
+#ifdef CONFIG_NFS_V4_2
+ struct nfs4_session *sess;
+ u32 server_gxa_sz;
+ u32 server_sxa_sz;
+ u32 server_lxa_sz;
+
+ if (!nfs4_has_session(server->nfs_client))
+ return;
+
+ sess = server->nfs_client->cl_session;
+
+ server_gxa_sz = sess->fc_attrs.max_resp_sz - nfs42_maxgetxattr_overhead;
+ server_sxa_sz = sess->fc_attrs.max_rqst_sz - nfs42_maxsetxattr_overhead;
+ server_lxa_sz = sess->fc_attrs.max_resp_sz -
+ nfs42_maxlistxattrs_overhead;
+
+ if (server->gxasize > server_gxa_sz)
+ server->gxasize = server_gxa_sz;
+ if (server->sxasize > server_sxa_sz)
+ server->sxasize = server_sxa_sz;
+ if (server->lxasize > server_lxa_sz)
+ server->lxasize = server_lxa_sz;
+#endif
+}
+
static int nfs4_server_common_setup(struct nfs_server *server,
struct nfs_fh *mntfh, bool auth_probe)
{
@@ -1039,6 +1069,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
goto out;
nfs4_session_limit_rwsize(server);
+ nfs4_session_limit_xasize(server);
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e32717fd1169..f670ff64b31e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -66,6 +66,7 @@
#include "nfs4idmap.h"
#include "nfs4session.h"
#include "fscache.h"
+#include "nfs42.h"
#include "nfs4trace.h"
@@ -256,6 +257,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
| FATTR4_WORD1_FS_LAYOUT_TYPES,
FATTR4_WORD2_LAYOUT_BLKSIZE
| FATTR4_WORD2_CLONE_BLKSIZE
+ | FATTR4_WORD2_XATTR_SUPPORT
};
const u32 nfs4_fs_locations_bitmap[3] = {
@@ -1157,37 +1159,49 @@ nfs4_dec_nlink_locked(struct inode *inode)
}
static void
-update_changeattr_locked(struct inode *dir, struct nfs4_change_info *cinfo,
+nfs4_update_changeattr_locked(struct inode *inode,
+ struct nfs4_change_info *cinfo,
unsigned long timestamp, unsigned long cache_validity)
{
- struct nfs_inode *nfsi = NFS_I(dir);
+ struct nfs_inode *nfsi = NFS_I(inode);
nfsi->cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
- | NFS_INO_INVALID_DATA
| cache_validity;
- if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) {
+
+ if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) {
nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
nfsi->attrtimeo_timestamp = jiffies;
} else {
- nfs_force_lookup_revalidate(dir);
- if (cinfo->before != inode_peek_iversion_raw(dir))
+ if (S_ISDIR(inode->i_mode)) {
+ nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_force_lookup_revalidate(inode);
+ } else {
+ if (!NFS_PROTO(inode)->have_delegation(inode,
+ FMODE_READ))
+ nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
+ }
+
+ if (cinfo->before != inode_peek_iversion_raw(inode))
nfsi->cache_validity |= NFS_INO_INVALID_ACCESS |
- NFS_INO_INVALID_ACL;
+ NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_XATTR;
}
- inode_set_iversion_raw(dir, cinfo->after);
+ inode_set_iversion_raw(inode, cinfo->after);
nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
- nfs_fscache_invalidate(dir);
+
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
}
-static void
-update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
+void
+nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
unsigned long timestamp, unsigned long cache_validity)
{
spin_lock(&dir->i_lock);
- update_changeattr_locked(dir, cinfo, timestamp, cache_validity);
+ nfs4_update_changeattr_locked(dir, cinfo, timestamp, cache_validity);
spin_unlock(&dir->i_lock);
}
@@ -1340,6 +1354,12 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
NFS4_ACCESS_MODIFY |
NFS4_ACCESS_EXTEND |
NFS4_ACCESS_EXECUTE;
+#ifdef CONFIG_NFS_V4_2
+ if (server->caps & NFS_CAP_XATTR)
+ p->o_arg.access |= NFS4_ACCESS_XAREAD |
+ NFS4_ACCESS_XAWRITE |
+ NFS4_ACCESS_XALIST;
+#endif
}
}
p->o_arg.clientid = server->nfs_client->cl_clientid;
@@ -2637,8 +2657,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data,
data->file_created = true;
if (data->file_created ||
inode_peek_iversion_raw(dir) != o_res->cinfo.after)
- update_changeattr(dir, &o_res->cinfo,
- o_res->f_attr->time_start, 0);
+ nfs4_update_changeattr(dir, &o_res->cinfo,
+ o_res->f_attr->time_start,
+ NFS_INO_INVALID_DATA);
}
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
@@ -3740,7 +3761,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL)
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
@@ -4524,7 +4545,8 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- update_changeattr_locked(dir, &res.cinfo, timestamp, 0);
+ nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
+ NFS_INO_INVALID_DATA);
/* Removing a directory decrements nlink in the parent */
if (ftype == NF4DIR && dir->i_nlink > 2)
nfs4_dec_nlink_locked(dir);
@@ -4608,8 +4630,9 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
&data->timeout) == -EAGAIN)
return 0;
if (task->tk_status == 0)
- update_changeattr(dir, &res->cinfo,
- res->dir_attr->time_start, 0);
+ nfs4_update_changeattr(dir, &res->cinfo,
+ res->dir_attr->time_start,
+ NFS_INO_INVALID_DATA);
return 1;
}
@@ -4653,16 +4676,18 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
if (task->tk_status == 0) {
if (new_dir != old_dir) {
/* Note: If we moved a directory, nlink will change */
- update_changeattr(old_dir, &res->old_cinfo,
+ nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start,
- NFS_INO_INVALID_OTHER);
- update_changeattr(new_dir, &res->new_cinfo,
+ NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_DATA);
+ nfs4_update_changeattr(new_dir, &res->new_cinfo,
res->new_fattr->time_start,
- NFS_INO_INVALID_OTHER);
+ NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_DATA);
} else
- update_changeattr(old_dir, &res->old_cinfo,
+ nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start,
- 0);
+ NFS_INO_INVALID_DATA);
}
return 1;
}
@@ -4703,7 +4728,8 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
- update_changeattr(dir, &res.cinfo, res.fattr->time_start, 0);
+ nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
status = nfs_post_op_update_inode(inode, res.fattr);
if (!status)
nfs_setsecurity(inode, res.fattr, res.label);
@@ -4781,8 +4807,9 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- update_changeattr_locked(dir, &data->res.dir_cinfo,
- data->res.fattr->time_start, 0);
+ nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
/* Creating a directory bumps nlink in the parent */
if (data->arg.ftype == NF4DIR)
nfs4_inc_nlink_locked(dir);
@@ -5515,7 +5542,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
*/
#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
-static int buf_to_pages_noslab(const void *buf, size_t buflen,
+int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen,
struct page **pages)
{
struct page *newpage, **spages;
@@ -5757,7 +5784,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
return -EOPNOTSUPP;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
- i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
+ i = nfs4_buf_to_pages_noslab(buf, buflen, arg.acl_pages);
if (i < 0)
return i;
nfs4_inode_make_writeable(inode);
@@ -7414,6 +7441,133 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
#endif
+#ifdef CONFIG_NFS_V4_2
+static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *key, const void *buf,
+ size_t buflen, int flags)
+{
+ struct nfs_access_entry cache;
+ int ret;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return -EOPNOTSUPP;
+
+ /*
+ * There is no mapping from the MAY_* flags to the NFS_ACCESS_XA*
+ * flags right now. Handling of xattr operations use the normal
+ * file read/write permissions.
+ *
+ * Just in case the server has other ideas (which RFC 8276 allows),
+ * do a cached access check for the XA* flags to possibly avoid
+ * doing an RPC and getting EACCES back.
+ */
+ if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
+ if (!(cache.mask & NFS_ACCESS_XAWRITE))
+ return -EACCES;
+ }
+
+ if (buf == NULL) {
+ ret = nfs42_proc_removexattr(inode, key);
+ if (!ret)
+ nfs4_xattr_cache_remove(inode, key);
+ } else {
+ ret = nfs42_proc_setxattr(inode, key, buf, buflen, flags);
+ if (!ret)
+ nfs4_xattr_cache_add(inode, key, buf, NULL, buflen);
+ }
+
+ return ret;
+}
+
+static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *key, void *buf, size_t buflen)
+{
+ struct nfs_access_entry cache;
+ ssize_t ret;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return -EOPNOTSUPP;
+
+ if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
+ if (!(cache.mask & NFS_ACCESS_XAREAD))
+ return -EACCES;
+ }
+
+ ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (ret)
+ return ret;
+
+ ret = nfs4_xattr_cache_get(inode, key, buf, buflen);
+ if (ret >= 0 || (ret < 0 && ret != -ENOENT))
+ return ret;
+
+ ret = nfs42_proc_getxattr(inode, key, buf, buflen);
+
+ return ret;
+}
+
+static ssize_t
+nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
+{
+ u64 cookie;
+ bool eof;
+ ssize_t ret, size;
+ char *buf;
+ size_t buflen;
+ struct nfs_access_entry cache;
+
+ if (!nfs_server_capable(inode, NFS_CAP_XATTR))
+ return 0;
+
+ if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
+ if (!(cache.mask & NFS_ACCESS_XALIST))
+ return 0;
+ }
+
+ ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (ret)
+ return ret;
+
+ ret = nfs4_xattr_cache_list(inode, list, list_len);
+ if (ret >= 0 || (ret < 0 && ret != -ENOENT))
+ return ret;
+
+ cookie = 0;
+ eof = false;
+ buflen = list_len ? list_len : XATTR_LIST_MAX;
+ buf = list_len ? list : NULL;
+ size = 0;
+
+ while (!eof) {
+ ret = nfs42_proc_listxattrs(inode, buf, buflen,
+ &cookie, &eof);
+ if (ret < 0)
+ return ret;
+
+ if (list_len) {
+ buf += ret;
+ buflen -= ret;
+ }
+ size += ret;
+ }
+
+ if (list_len)
+ nfs4_xattr_cache_set_list(inode, list, size);
+
+ return size;
+}
+
+#else
+
+static ssize_t
+nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
+{
+ return 0;
+}
+#endif /* CONFIG_NFS_V4_2 */
+
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
*/
@@ -10019,7 +10173,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
- ssize_t error, error2;
+ ssize_t error, error2, error3;
error = generic_listxattr(dentry, list, size);
if (error < 0)
@@ -10032,7 +10186,17 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
if (error2 < 0)
return error2;
- return error + error2;
+
+ if (list) {
+ list += error2;
+ size -= error2;
+ }
+
+ error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size);
+ if (error3 < 0)
+ return error3;
+
+ return error + error2 + error3;
}
static const struct inode_operations nfs4_dir_inode_operations = {
@@ -10120,11 +10284,22 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
.set = nfs4_xattr_set_nfs4_acl,
};
+#ifdef CONFIG_NFS_V4_2
+static const struct xattr_handler nfs4_xattr_nfs4_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = nfs4_xattr_get_nfs4_user,
+ .set = nfs4_xattr_set_nfs4_user,
+};
+#endif
+
const struct xattr_handler *nfs4_xattr_handlers[] = {
&nfs4_xattr_nfs4_acl_handler,
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
&nfs4_xattr_nfs4_label_handler,
#endif
+#ifdef CONFIG_NFS_V4_2
+ &nfs4_xattr_nfs4_user_handler,
+#endif
NULL
};
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 1475f932d7da..0c1ab846b83d 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -69,6 +69,7 @@ static void nfs4_evict_inode(struct inode *inode)
pnfs_destroy_layout(NFS_I(inode));
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
+ nfs4_xattr_cache_zap(inode);
}
struct nfs_referral_count {
@@ -268,6 +269,12 @@ static int __init init_nfs_v4(void)
if (err)
goto out1;
+#ifdef CONFIG_NFS_V4_2
+ err = nfs4_xattr_cache_init();
+ if (err)
+ goto out2;
+#endif
+
err = nfs4_register_sysctl();
if (err)
goto out2;
@@ -288,6 +295,9 @@ static void __exit exit_nfs_v4(void)
nfs4_pnfs_v3_ds_connect_unload();
unregister_nfs_version(&nfs_v4);
+#ifdef CONFIG_NFS_V4_2
+ nfs4_xattr_cache_exit();
+#endif
nfs4_unregister_sysctl();
nfs_idmap_quit();
nfs_dns_resolver_destroy();
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 47817ef0aadb..388ac520b104 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4201,6 +4201,26 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str
return status;
}
+static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap,
+ uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_XATTR_SUPPORT - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_XATTR_SUPPORT)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT;
+ }
+ dprintk("%s: XATTR support=%s\n", __func__,
+ *res == 0 ? "false" : "true");
+ return 0;
+}
+
static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen)
{
unsigned int attrwords = XDR_QUADLEN(attrlen);
@@ -4855,6 +4875,11 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
if (status)
goto xdr_error;
+ status = decode_attr_xattrsupport(xdr, bitmap,
+ &fsinfo->xattr_support);
+ if (status)
+ goto xdr_error;
+
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
dprintk("%s: xdr returned %d!\n", __func__, -status);
@@ -7456,6 +7481,8 @@ static struct {
{ NFS4ERR_SYMLINK, -ELOOP },
{ NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
{ NFS4ERR_DEADLOCK, -EDEADLK },
+ { NFS4ERR_NOXATTR, -ENODATA },
+ { NFS4ERR_XATTR2BIG, -E2BIG },
{ -1, -EIO }
};
@@ -7584,6 +7611,10 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
+ PROC42(GETXATTR, enc_getxattr, dec_getxattr),
+ PROC42(SETXATTR, enc_setxattr, dec_setxattr),
+ PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs),
+ PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr),
};
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 547cec79899f..5a59dcdce0b2 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -59,7 +59,8 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
{ NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \
{ NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \
{ NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \
- { NFS_INO_INVALID_OTHER, "INVALID_OTHER" })
+ { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \
+ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" })
TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
TRACE_DEFINE_ENUM(NFS_INO_STALE);
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 4dba3c948932..db13026ac7d1 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -150,6 +150,12 @@ enum nfs_opnum4 {
OP_WRITE_SAME = 70,
OP_CLONE = 71,
+ /* xattr support (RFC8726) */
+ OP_GETXATTR = 72,
+ OP_SETXATTR = 73,
+ OP_LISTXATTRS = 74,
+ OP_REMOVEXATTR = 75,
+
OP_ILLEGAL = 10044,
};
@@ -280,6 +286,10 @@ enum nfsstat4 {
NFS4ERR_WRONG_LFS = 10092,
NFS4ERR_BADLABEL = 10093,
NFS4ERR_OFFLOAD_NO_REQS = 10094,
+
+ /* xattr (RFC8276) */
+ NFS4ERR_NOXATTR = 10095,
+ NFS4ERR_XATTR2BIG = 10096,
};
static inline bool seqid_mutating_err(u32 err)
@@ -452,6 +462,7 @@ enum change_attr_type4 {
#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_MODE_UMASK (1UL << 17)
+#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18)
/* MDS threshold bitmap bits */
#define THRESHOLD_RD (1UL << 0)
@@ -542,6 +553,11 @@ enum {
NFSPROC4_CLNT_LAYOUTERROR,
NFSPROC4_CLNT_COPY_NOTIFY,
+
+ NFSPROC4_CLNT_GETXATTR,
+ NFSPROC4_CLNT_SETXATTR,
+ NFSPROC4_CLNT_LISTXATTRS,
+ NFSPROC4_CLNT_REMOVEXATTR,
};
/* nfs41 types */
@@ -700,4 +716,13 @@ struct nl4_server {
struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */
} u;
};
+
+/*
+ * Options for setxattr. These match the flags for setxattr(2).
+ */
+enum nfs4_setxattr_options {
+ SETXATTR4_EITHER = 0,
+ SETXATTR4_CREATE = 1,
+ SETXATTR4_REPLACE = 2,
+};
#endif
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6ee9119acc5d..a2c6455ea3fa 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -102,6 +102,8 @@ struct nfs_delegation;
struct posix_acl;
+struct nfs4_xattr_cache;
+
/*
* nfs fs inode data in memory
*/
@@ -188,6 +190,10 @@ struct nfs_inode {
struct fscache_cookie *fscache;
#endif
struct inode vfs_inode;
+
+#ifdef CONFIG_NFS_V4_2
+ struct nfs4_xattr_cache *xattr_cache;
+#endif
};
struct nfs4_copy_state {
@@ -212,6 +218,9 @@ struct nfs4_copy_state {
#define NFS_ACCESS_EXTEND 0x0008
#define NFS_ACCESS_DELETE 0x0010
#define NFS_ACCESS_EXECUTE 0x0020
+#define NFS_ACCESS_XAREAD 0x0040
+#define NFS_ACCESS_XAWRITE 0x0080
+#define NFS_ACCESS_XALIST 0x0100
/*
* Cache validity bit flags
@@ -231,6 +240,7 @@ struct nfs4_copy_state {
#define NFS_INO_DATA_INVAL_DEFER \
BIT(13) /* Deferred cache invalidation */
#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */
+#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */
#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \
| NFS_INO_INVALID_CTIME \
@@ -490,6 +500,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
struct nfs_fattr *fattr, struct nfs4_label *label);
extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
extern void nfs_access_zap_cache(struct inode *inode);
+extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res,
+ bool may_block);
/*
* linux/fs/nfs/symlink.c
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 465fa98258a3..7eae72a8762e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -163,6 +163,11 @@ struct nfs_server {
unsigned int dtsize; /* readdir size */
unsigned short port; /* "port=" setting */
unsigned int bsize; /* server block size */
+#ifdef CONFIG_NFS_V4_2
+ unsigned int gxasize; /* getxattr size */
+ unsigned int sxasize; /* setxattr size */
+ unsigned int lxasize; /* listxattr size */
+#endif
unsigned int acregmin; /* attr cache timeouts */
unsigned int acregmax;
unsigned int acdirmin;
@@ -281,5 +286,6 @@ struct nfs_server {
#define NFS_CAP_OFFLOAD_CANCEL (1U << 25)
#define NFS_CAP_LAYOUTERROR (1U << 26)
#define NFS_CAP_COPY_NOTIFY (1U << 27)
+#define NFS_CAP_XATTR (1U << 28)
#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5fd0a9ef425f..9408f3252c8e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -150,6 +150,7 @@ struct nfs_fsinfo {
__u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
+ __u32 xattr_support; /* User xattrs supported */
};
struct nfs_fsstat {
@@ -1497,7 +1498,64 @@ struct nfs42_seek_res {
u32 sr_eof;
u64 sr_offset;
};
-#endif
+
+struct nfs42_setxattrargs {
+ struct nfs4_sequence_args seq_args;
+ struct nfs_fh *fh;
+ const char *xattr_name;
+ u32 xattr_flags;
+ size_t xattr_len;
+ struct page **xattr_pages;
+};
+
+struct nfs42_setxattrres {
+ struct nfs4_sequence_res seq_res;
+ struct nfs4_change_info cinfo;
+};
+
+struct nfs42_getxattrargs {
+ struct nfs4_sequence_args seq_args;
+ struct nfs_fh *fh;
+ const char *xattr_name;
+ size_t xattr_len;
+ struct page **xattr_pages;
+};
+
+struct nfs42_getxattrres {
+ struct nfs4_sequence_res seq_res;
+ size_t xattr_len;
+};
+
+struct nfs42_listxattrsargs {
+ struct nfs4_sequence_args seq_args;
+ struct nfs_fh *fh;
+ u32 count;
+ u64 cookie;
+ struct page **xattr_pages;
+};
+
+struct nfs42_listxattrsres {
+ struct nfs4_sequence_res seq_res;
+ struct page *scratch;
+ void *xattr_buf;
+ size_t xattr_len;
+ u64 cookie;
+ bool eof;
+ size_t copied;
+};
+
+struct nfs42_removexattrargs {
+ struct nfs4_sequence_args seq_args;
+ struct nfs_fh *fh;
+ const char *xattr_name;
+};
+
+struct nfs42_removexattrres {
+ struct nfs4_sequence_res seq_res;
+ struct nfs4_change_info cinfo;
+};
+
+#endif /* CONFIG_NFS_V4_2 */
struct nfs_page;
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 8572930cf5b0..bf197e99b98f 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -33,6 +33,9 @@
#define NFS4_ACCESS_EXTEND 0x0008
#define NFS4_ACCESS_DELETE 0x0010
#define NFS4_ACCESS_EXECUTE 0x0020
+#define NFS4_ACCESS_XAREAD 0x0040
+#define NFS4_ACCESS_XAWRITE 0x0080
+#define NFS4_ACCESS_XALIST 0x0100
#define NFS4_FH_PERSISTENT 0x0000
#define NFS4_FH_NOEXPIRE_WITH_OPEN 0x0001
diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h
index 7bcc8cd6831d..3afe3767c55d 100644
--- a/include/uapi/linux/nfs_fs.h
+++ b/include/uapi/linux/nfs_fs.h
@@ -56,6 +56,7 @@
#define NFSDBG_PNFS 0x1000
#define NFSDBG_PNFS_LD 0x2000
#define NFSDBG_STATE 0x4000
+#define NFSDBG_XATTRCACHE 0x8000
#define NFSDBG_ALL 0xFFFF