summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig12
-rw-r--r--fs/Makefile1
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/ecryptfs/Makefile7
-rw-r--r--fs/ecryptfs/crypto.c1659
-rw-r--r--fs/ecryptfs/debug.c123
-rw-r--r--fs/ecryptfs/dentry.c87
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h482
-rw-r--r--fs/ecryptfs/file.c440
-rw-r--r--fs/ecryptfs/inode.c1079
-rw-r--r--fs/ecryptfs/keystore.c1061
-rw-r--r--fs/ecryptfs/main.c831
-rw-r--r--fs/ecryptfs/mmap.c788
-rw-r--r--fs/ecryptfs/super.c198
-rw-r--r--fs/lockd/clntlock.c54
-rw-r--r--fs/lockd/clntproc.c17
-rw-r--r--fs/lockd/host.c325
-rw-r--r--fs/lockd/mon.c65
-rw-r--r--fs/lockd/svc.c19
-rw-r--r--fs/lockd/svc4proc.c29
-rw-r--r--fs/lockd/svclock.c197
-rw-r--r--fs/lockd/svcproc.c27
-rw-r--r--fs/lockd/svcshare.c20
-rw-r--r--fs/lockd/svcsubs.c164
-rw-r--r--fs/nfsd/export.c149
-rw-r--r--fs/nfsd/nfs2acl.c3
-rw-r--r--fs/nfsd/nfs3acl.c3
-rw-r--r--fs/nfsd/nfs3proc.c18
-rw-r--r--fs/nfsd/nfs3xdr.c56
-rw-r--r--fs/nfsd/nfs4acl.c711
-rw-r--r--fs/nfsd/nfs4proc.c32
-rw-r--r--fs/nfsd/nfs4xdr.c231
-rw-r--r--fs/nfsd/nfsctl.c49
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c19
-rw-r--r--fs/nfsd/nfsxdr.c43
-rw-r--r--fs/nfsd/vfs.c86
-rw-r--r--fs/reiserfs/inode.c2
38 files changed, 8173 insertions, 935 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 68f4561423ff..674cfbb83a95 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -995,6 +995,18 @@ config AFFS_FS
To compile this file system support as a module, choose M here: the
module will be called affs. If unsure, say N.
+config ECRYPT_FS
+ tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && KEYS && CRYPTO
+ help
+ Encrypted filesystem that operates on the VFS layer. See
+ <file:Documentation/ecryptfs.txt> to learn more about
+ eCryptfs. Userspace components are required and can be
+ obtained from <http://ecryptfs.sf.net>.
+
+ To compile this file system support as a module, choose M here: the
+ module will be called ecryptfs.
+
config HFS_FS
tristate "Apple Macintosh file system support (EXPERIMENTAL)"
depends on BLOCK && EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile
index 819b2a93bebe..fd24d67a7cdb 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -75,6 +75,7 @@ obj-$(CONFIG_BFS_FS) += bfs/
obj-$(CONFIG_ISO9660_FS) += isofs/
obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
obj-$(CONFIG_HFS_FS) += hfs/
+obj-$(CONFIG_ECRYPT_FS) += ecryptfs/
obj-$(CONFIG_VXFS_FS) += freevxfs/
obj-$(CONFIG_NFS_FS) += nfs/
obj-$(CONFIG_EXPORTFS) += exportfs/
diff --git a/fs/dcache.c b/fs/dcache.c
index fc2faa44f8d1..2355bddad8de 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -291,9 +291,9 @@ struct dentry * dget_locked(struct dentry *dentry)
* it can be unhashed only if it has no children, or if it is the root
* of a filesystem.
*
- * If the inode has a DCACHE_DISCONNECTED alias, then prefer
+ * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
* any other hashed alias over that one unless @want_discon is set,
- * in which case only return a DCACHE_DISCONNECTED alias.
+ * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
*/
static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
@@ -309,7 +309,8 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
prefetch(next);
alias = list_entry(tmp, struct dentry, d_alias);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
- if (alias->d_flags & DCACHE_DISCONNECTED)
+ if (IS_ROOT(alias) &&
+ (alias->d_flags & DCACHE_DISCONNECTED))
discon_alias = alias;
else if (!want_discon) {
__dget_locked(alias);
@@ -1004,7 +1005,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
{
struct dentry *new = NULL;
- if (inode) {
+ if (inode && S_ISDIR(inode->i_mode)) {
spin_lock(&dcache_lock);
new = __d_find_alias(inode, 1);
if (new) {
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
new file mode 100644
index 000000000000..ca6562451eeb
--- /dev/null
+++ b/fs/ecryptfs/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Linux 2.6 eCryptfs
+#
+
+obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
+
+ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
new file mode 100644
index 000000000000..ed35a9712fa1
--- /dev/null
+++ b/fs/ecryptfs/crypto.c
@@ -0,0 +1,1659 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 1997-2004 Erez Zadok
+ * Copyright (C) 2001-2004 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Michael C. Thompson <mcthomps@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/random.h>
+#include <linux/compiler.h>
+#include <linux/key.h>
+#include <linux/namei.h>
+#include <linux/crypto.h>
+#include <linux/file.h>
+#include <linux/scatterlist.h>
+#include "ecryptfs_kernel.h"
+
+static int
+ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
+ struct page *dst_page, int dst_offset,
+ struct page *src_page, int src_offset, int size,
+ unsigned char *iv);
+static int
+ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
+ struct page *dst_page, int dst_offset,
+ struct page *src_page, int src_offset, int size,
+ unsigned char *iv);
+
+/**
+ * ecryptfs_to_hex
+ * @dst: Buffer to take hex character representation of contents of
+ * src; must be at least of size (src_size * 2)
+ * @src: Buffer to be converted to a hex string respresentation
+ * @src_size: number of bytes to convert
+ */
+void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
+{
+ int x;
+
+ for (x = 0; x < src_size; x++)
+ sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]);
+}
+
+/**
+ * ecryptfs_from_hex
+ * @dst: Buffer to take the bytes from src hex; must be at least of
+ * size (src_size / 2)
+ * @src: Buffer to be converted from a hex string respresentation to raw value
+ * @dst_size: size of dst buffer, or number of hex characters pairs to convert
+ */
+void ecryptfs_from_hex(char *dst, char *src, int dst_size)
+{
+ int x;
+ char tmp[3] = { 0, };
+
+ for (x = 0; x < dst_size; x++) {
+ tmp[0] = src[x * 2];
+ tmp[1] = src[x * 2 + 1];
+ dst[x] = (unsigned char)simple_strtol(tmp, NULL, 16);
+ }
+}
+
+/**
+ * ecryptfs_calculate_md5 - calculates the md5 of @src
+ * @dst: Pointer to 16 bytes of allocated memory
+ * @crypt_stat: Pointer to crypt_stat struct for the current inode
+ * @src: Data to be md5'd
+ * @len: Length of @src
+ *
+ * Uses the allocated crypto context that crypt_stat references to
+ * generate the MD5 sum of the contents of src.
+ */
+static int ecryptfs_calculate_md5(char *dst,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ char *src, int len)
+{
+ int rc = 0;
+ struct scatterlist sg;
+
+ mutex_lock(&crypt_stat->cs_md5_tfm_mutex);
+ sg_init_one(&sg, (u8 *)src, len);
+ if (!crypt_stat->md5_tfm) {
+ crypt_stat->md5_tfm =
+ crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
+ if (!crypt_stat->md5_tfm) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Error attempting to "
+ "allocate crypto context\n");
+ goto out;
+ }
+ }
+ crypto_digest_init(crypt_stat->md5_tfm);
+ crypto_digest_update(crypt_stat->md5_tfm, &sg, 1);
+ crypto_digest_final(crypt_stat->md5_tfm, dst);
+ mutex_unlock(&crypt_stat->cs_md5_tfm_mutex);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_derive_iv
+ * @iv: destination for the derived iv vale
+ * @crypt_stat: Pointer to crypt_stat struct for the current inode
+ * @offset: Offset of the page whose's iv we are to derive
+ *
+ * Generate the initialization vector from the given root IV and page
+ * offset.
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
+ pgoff_t offset)
+{
+ int rc = 0;
+ char dst[MD5_DIGEST_SIZE];
+ char src[ECRYPTFS_MAX_IV_BYTES + 16];
+
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "root iv:\n");
+ ecryptfs_dump_hex(crypt_stat->root_iv, crypt_stat->iv_bytes);
+ }
+ /* TODO: It is probably secure to just cast the least
+ * significant bits of the root IV into an unsigned long and
+ * add the offset to that rather than go through all this
+ * hashing business. -Halcrow */
+ memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes);
+ memset((src + crypt_stat->iv_bytes), 0, 16);
+ snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset);
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "source:\n");
+ ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16));
+ }
+ rc = ecryptfs_calculate_md5(dst, crypt_stat, src,
+ (crypt_stat->iv_bytes + 16));
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
+ "MD5 while generating IV for a page\n");
+ goto out;
+ }
+ memcpy(iv, dst, crypt_stat->iv_bytes);
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "derived iv:\n");
+ ecryptfs_dump_hex(iv, crypt_stat->iv_bytes);
+ }
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_init_crypt_stat
+ * @crypt_stat: Pointer to the crypt_stat struct to initialize.
+ *
+ * Initialize the crypt_stat structure.
+ */
+void
+ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
+ mutex_init(&crypt_stat->cs_mutex);
+ mutex_init(&crypt_stat->cs_tfm_mutex);
+ mutex_init(&crypt_stat->cs_md5_tfm_mutex);
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED);
+}
+
+/**
+ * ecryptfs_destruct_crypt_stat
+ * @crypt_stat: Pointer to the crypt_stat struct to initialize.
+ *
+ * Releases all memory associated with a crypt_stat struct.
+ */
+void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ if (crypt_stat->tfm)
+ crypto_free_tfm(crypt_stat->tfm);
+ if (crypt_stat->md5_tfm)
+ crypto_free_tfm(crypt_stat->md5_tfm);
+ memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
+}
+
+void ecryptfs_destruct_mount_crypt_stat(
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
+{
+ if (mount_crypt_stat->global_auth_tok_key)
+ key_put(mount_crypt_stat->global_auth_tok_key);
+ if (mount_crypt_stat->global_key_tfm)
+ crypto_free_tfm(mount_crypt_stat->global_key_tfm);
+ memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat));
+}
+
+/**
+ * virt_to_scatterlist
+ * @addr: Virtual address
+ * @size: Size of data; should be an even multiple of the block size
+ * @sg: Pointer to scatterlist array; set to NULL to obtain only
+ * the number of scatterlist structs required in array
+ * @sg_size: Max array size
+ *
+ * Fills in a scatterlist array with page references for a passed
+ * virtual address.
+ *
+ * Returns the number of scatterlist structs in array used
+ */
+int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
+ int sg_size)
+{
+ int i = 0;
+ struct page *pg;
+ int offset;
+ int remainder_of_page;
+
+ while (size > 0 && i < sg_size) {
+ pg = virt_to_page(addr);
+ offset = offset_in_page(addr);
+ if (sg) {
+ sg[i].page = pg;
+ sg[i].offset = offset;
+ }
+ remainder_of_page = PAGE_CACHE_SIZE - offset;
+ if (size >= remainder_of_page) {
+ if (sg)
+ sg[i].length = remainder_of_page;
+ addr += remainder_of_page;
+ size -= remainder_of_page;
+ } else {
+ if (sg)
+ sg[i].length = size;
+ addr += size;
+ size = 0;
+ }
+ i++;
+ }
+ if (size > 0)
+ return -ENOMEM;
+ return i;
+}
+
+/**
+ * encrypt_scatterlist
+ * @crypt_stat: Pointer to the crypt_stat struct to initialize.
+ * @dest_sg: Destination of encrypted data
+ * @src_sg: Data to be encrypted
+ * @size: Length of data to be encrypted
+ * @iv: iv to use during encryption
+ *
+ * Returns the number of bytes encrypted; negative value on error
+ */
+static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
+ struct scatterlist *dest_sg,
+ struct scatterlist *src_sg, int size,
+ unsigned char *iv)
+{
+ int rc = 0;
+
+ BUG_ON(!crypt_stat || !crypt_stat->tfm
+ || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
+ ECRYPTFS_STRUCT_INITIALIZED));
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
+ crypt_stat->key_size);
+ ecryptfs_dump_hex(crypt_stat->key,
+ crypt_stat->key_size);
+ }
+ /* Consider doing this once, when the file is opened */
+ mutex_lock(&crypt_stat->cs_tfm_mutex);
+ rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
+ crypt_stat->key_size);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
+ rc);
+ mutex_unlock(&crypt_stat->cs_tfm_mutex);
+ rc = -EINVAL;
+ goto out;
+ }
+ ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size);
+ crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv);
+ mutex_unlock(&crypt_stat->cs_tfm_mutex);
+out:
+ return rc;
+}
+
+static void
+ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx,
+ int *byte_offset,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ unsigned long extent_num)
+{
+ unsigned long lower_extent_num;
+ int extents_occupied_by_headers_at_front;
+ int bytes_occupied_by_headers_at_front;
+ int extent_offset;
+ int extents_per_page;
+
+ bytes_occupied_by_headers_at_front =
+ ( crypt_stat->header_extent_size
+ * crypt_stat->num_header_extents_at_front );
+ extents_occupied_by_headers_at_front =
+ ( bytes_occupied_by_headers_at_front
+ / crypt_stat->extent_size );
+ lower_extent_num = extents_occupied_by_headers_at_front + extent_num;
+ extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
+ (*lower_page_idx) = lower_extent_num / extents_per_page;
+ extent_offset = lower_extent_num % extents_per_page;
+ (*byte_offset) = extent_offset * crypt_stat->extent_size;
+ ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = "
+ "[%d]\n", crypt_stat->header_extent_size);
+ ecryptfs_printk(KERN_DEBUG, " * crypt_stat->"
+ "num_header_extents_at_front = [%d]\n",
+ crypt_stat->num_header_extents_at_front);
+ ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_"
+ "front = [%d]\n", extents_occupied_by_headers_at_front);
+ ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n",
+ lower_extent_num);
+ ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n",
+ extents_per_page);
+ ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n",
+ (*lower_page_idx));
+ ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n",
+ extent_offset);
+ ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n",
+ (*byte_offset));
+}
+
+static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx,
+ struct page *lower_page,
+ struct inode *lower_inode,
+ int byte_offset_in_page, int bytes_to_write)
+{
+ int rc = 0;
+
+ if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
+ rc = ecryptfs_commit_lower_page(lower_page, lower_inode,
+ ctx->param.lower_file,
+ byte_offset_in_page,
+ bytes_to_write);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error calling lower "
+ "commit; rc = [%d]\n", rc);
+ goto out;
+ }
+ } else {
+ rc = ecryptfs_writepage_and_release_lower_page(lower_page,
+ lower_inode,
+ ctx->param.wbc);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error calling lower "
+ "writepage(); rc = [%d]\n", rc);
+ goto out;
+ }
+ }
+out:
+ return rc;
+}
+
+static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx,
+ struct page **lower_page,
+ struct inode *lower_inode,
+ unsigned long lower_page_idx,
+ int byte_offset_in_page)
+{
+ int rc = 0;
+
+ if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
+ /* TODO: Limit this to only the data extents that are
+ * needed */
+ rc = ecryptfs_get_lower_page(lower_page, lower_inode,
+ ctx->param.lower_file,
+ lower_page_idx,
+ byte_offset_in_page,
+ (PAGE_CACHE_SIZE
+ - byte_offset_in_page));
+ if (rc) {
+ ecryptfs_printk(
+ KERN_ERR, "Error attempting to grab, map, "
+ "and prepare_write lower page with index "
+ "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc);
+ goto out;
+ }
+ } else {
+ rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL,
+ lower_inode,
+ lower_page_idx);
+ if (rc) {
+ ecryptfs_printk(
+ KERN_ERR, "Error attempting to grab and map "
+ "lower page with index [0x%.16x]; rc = [%d]\n",
+ lower_page_idx, rc);
+ goto out;
+ }
+ }
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_encrypt_page
+ * @ctx: The context of the page
+ *
+ * Encrypt an eCryptfs page. This is done on a per-extent basis. Note
+ * that eCryptfs pages may straddle the lower pages -- for instance,
+ * if the file was created on a machine with an 8K page size
+ * (resulting in an 8K header), and then the file is copied onto a
+ * host with a 32K page size, then when reading page 0 of the eCryptfs
+ * file, 24K of page 0 of the lower file will be read and decrypted,
+ * and then 8K of page 1 of the lower file will be read and decrypted.
+ *
+ * The actual operations performed on each page depends on the
+ * contents of the ecryptfs_page_crypt_context struct.
+ *
+ * Returns zero on success; negative on error
+ */
+int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx)
+{
+ char extent_iv[ECRYPTFS_MAX_IV_BYTES];
+ unsigned long base_extent;
+ unsigned long extent_offset = 0;
+ unsigned long lower_page_idx = 0;
+ unsigned long prior_lower_page_idx = 0;
+ struct page *lower_page;
+ struct inode *lower_inode;
+ struct ecryptfs_inode_info *inode_info;
+ struct ecryptfs_crypt_stat *crypt_stat;
+ int rc = 0;
+ int lower_byte_offset = 0;
+ int orig_byte_offset = 0;
+ int num_extents_per_page;
+#define ECRYPTFS_PAGE_STATE_UNREAD 0
+#define ECRYPTFS_PAGE_STATE_READ 1
+#define ECRYPTFS_PAGE_STATE_MODIFIED 2
+#define ECRYPTFS_PAGE_STATE_WRITTEN 3
+ int page_state;
+
+ lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host);
+ inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host);
+ crypt_stat = &inode_info->crypt_stat;
+ if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
+ rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode,
+ ctx->param.lower_file);
+ if (rc)
+ ecryptfs_printk(KERN_ERR, "Error attempting to copy "
+ "page at index [0x%.16x]\n",
+ ctx->page->index);
+ goto out;
+ }
+ num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
+ base_extent = (ctx->page->index * num_extents_per_page);
+ page_state = ECRYPTFS_PAGE_STATE_UNREAD;
+ while (extent_offset < num_extents_per_page) {
+ ecryptfs_extent_to_lwr_pg_idx_and_offset(
+ &lower_page_idx, &lower_byte_offset, crypt_stat,
+ (base_extent + extent_offset));
+ if (prior_lower_page_idx != lower_page_idx
+ && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) {
+ rc = ecryptfs_write_out_page(ctx, lower_page,
+ lower_inode,
+ orig_byte_offset,
+ (PAGE_CACHE_SIZE
+ - orig_byte_offset));
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error attempting "
+ "to write out page; rc = [%d]"
+ "\n", rc);
+ goto out;
+ }
+ page_state = ECRYPTFS_PAGE_STATE_WRITTEN;
+ }
+ if (page_state == ECRYPTFS_PAGE_STATE_UNREAD
+ || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) {
+ rc = ecryptfs_read_in_page(ctx, &lower_page,
+ lower_inode, lower_page_idx,
+ lower_byte_offset);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error attempting "
+ "to read in lower page with "
+ "index [0x%.16x]; rc = [%d]\n",
+ lower_page_idx, rc);
+ goto out;
+ }
+ orig_byte_offset = lower_byte_offset;
+ prior_lower_page_idx = lower_page_idx;
+ page_state = ECRYPTFS_PAGE_STATE_READ;
+ }
+ BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED
+ || page_state == ECRYPTFS_PAGE_STATE_READ));
+ rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
+ (base_extent + extent_offset));
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error attempting to "
+ "derive IV for extent [0x%.16x]; "
+ "rc = [%d]\n",
+ (base_extent + extent_offset), rc);
+ goto out;
+ }
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
+ "with iv:\n");
+ ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
+ ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
+ "encryption:\n");
+ ecryptfs_dump_hex((char *)
+ (page_address(ctx->page)
+ + (extent_offset
+ * crypt_stat->extent_size)), 8);
+ }
+ rc = ecryptfs_encrypt_page_offset(
+ crypt_stat, lower_page, lower_byte_offset, ctx->page,
+ (extent_offset * crypt_stat->extent_size),
+ crypt_stat->extent_size, extent_iv);
+ ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
+ "rc = [%d]\n",
+ (base_extent + extent_offset), rc);
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
+ "encryption:\n");
+ ecryptfs_dump_hex((char *)(page_address(lower_page)
+ + lower_byte_offset), 8);
+ }
+ page_state = ECRYPTFS_PAGE_STATE_MODIFIED;
+ extent_offset++;
+ }
+ BUG_ON(orig_byte_offset != 0);
+ rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0,
+ (lower_byte_offset
+ + crypt_stat->extent_size));
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error attempting to write out "
+ "page; rc = [%d]\n", rc);
+ goto out;
+ }
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_decrypt_page
+ * @file: The ecryptfs file
+ * @page: The page in ecryptfs to decrypt
+ *
+ * Decrypt an eCryptfs page. This is done on a per-extent basis. Note
+ * that eCryptfs pages may straddle the lower pages -- for instance,
+ * if the file was created on a machine with an 8K page size
+ * (resulting in an 8K header), and then the file is copied onto a
+ * host with a 32K page size, then when reading page 0 of the eCryptfs
+ * file, 24K of page 0 of the lower file will be read and decrypted,
+ * and then 8K of page 1 of the lower file will be read and decrypted.
+ *
+ * Returns zero on success; negative on error
+ */
+int ecryptfs_decrypt_page(struct file *file, struct page *page)
+{
+ char extent_iv[ECRYPTFS_MAX_IV_BYTES];
+ unsigned long base_extent;
+ unsigned long extent_offset = 0;
+ unsigned long lower_page_idx = 0;
+ unsigned long prior_lower_page_idx = 0;
+ struct page *lower_page;
+ char *lower_page_virt = NULL;
+ struct inode *lower_inode;
+ struct ecryptfs_crypt_stat *crypt_stat;
+ int rc = 0;
+ int byte_offset;
+ int num_extents_per_page;
+ int page_state;
+
+ crypt_stat = &(ecryptfs_inode_to_private(
+ page->mapping->host)->crypt_stat);
+ lower_inode = ecryptfs_inode_to_lower(page->mapping->host);
+ if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
+ rc = ecryptfs_do_readpage(file, page, page->index);
+ if (rc)
+ ecryptfs_printk(KERN_ERR, "Error attempting to copy "
+ "page at index [0x%.16x]\n",
+ page->index);
+ goto out;
+ }
+ num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
+ base_extent = (page->index * num_extents_per_page);
+ lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
+ SLAB_KERNEL);
+ if (!lower_page_virt) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Error getting page for encrypted "
+ "lower page(s)\n");
+ goto out;
+ }
+ lower_page = virt_to_page(lower_page_virt);
+ page_state = ECRYPTFS_PAGE_STATE_UNREAD;
+ while (extent_offset < num_extents_per_page) {
+ ecryptfs_extent_to_lwr_pg_idx_and_offset(
+ &lower_page_idx, &byte_offset, crypt_stat,
+ (base_extent + extent_offset));
+ if (prior_lower_page_idx != lower_page_idx
+ || page_state == ECRYPTFS_PAGE_STATE_UNREAD) {
+ rc = ecryptfs_do_readpage(file, lower_page,
+ lower_page_idx);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error reading "
+ "lower encrypted page; rc = "
+ "[%d]\n", rc);
+ goto out;
+ }
+ prior_lower_page_idx = lower_page_idx;
+ page_state = ECRYPTFS_PAGE_STATE_READ;
+ }
+ rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
+ (base_extent + extent_offset));
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error attempting to "
+ "derive IV for extent [0x%.16x]; rc = "
+ "[%d]\n",
+ (base_extent + extent_offset), rc);
+ goto out;
+ }
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
+ "with iv:\n");
+ ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
+ ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
+ "decryption:\n");
+ ecryptfs_dump_hex((lower_page_virt + byte_offset), 8);
+ }
+ rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
+ (extent_offset
+ * crypt_stat->extent_size),
+ lower_page, byte_offset,
+ crypt_stat->extent_size,
+ extent_iv);
+ if (rc != crypt_stat->extent_size) {
+ ecryptfs_printk(KERN_ERR, "Error attempting to "
+ "decrypt extent [0x%.16x]\n",
+ (base_extent + extent_offset));
+ goto out;
+ }
+ rc = 0;
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
+ "decryption:\n");
+ ecryptfs_dump_hex((char *)(page_address(page)
+ + byte_offset), 8);
+ }
+ extent_offset++;
+ }
+out:
+ if (lower_page_virt)
+ kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt);
+ return rc;
+}
+
+/**
+ * decrypt_scatterlist
+ *
+ * Returns the number of bytes decrypted; negative value on error
+ */
+static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
+ struct scatterlist *dest_sg,
+ struct scatterlist *src_sg, int size,
+ unsigned char *iv)
+{
+ int rc = 0;
+
+ /* Consider doing this once, when the file is opened */
+ mutex_lock(&crypt_stat->cs_tfm_mutex);
+ rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key,
+ crypt_stat->key_size);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n",
+ rc);
+ mutex_unlock(&crypt_stat->cs_tfm_mutex);
+ rc = -EINVAL;
+ goto out;
+ }
+ ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size);
+ rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size,
+ iv);
+ mutex_unlock(&crypt_stat->cs_tfm_mutex);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n",
+ rc);
+ goto out;
+ }
+ rc = size;
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_encrypt_page_offset
+ *
+ * Returns the number of bytes encrypted
+ */
+static int
+ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
+ struct page *dst_page, int dst_offset,
+ struct page *src_page, int src_offset, int size,
+ unsigned char *iv)
+{
+ struct scatterlist src_sg, dst_sg;
+
+ src_sg.page = src_page;
+ src_sg.offset = src_offset;
+ src_sg.length = size;
+ dst_sg.page = dst_page;
+ dst_sg.offset = dst_offset;
+ dst_sg.length = size;
+ return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
+}
+
+/**
+ * ecryptfs_decrypt_page_offset
+ *
+ * Returns the number of bytes decrypted
+ */
+static int
+ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
+ struct page *dst_page, int dst_offset,
+ struct page *src_page, int src_offset, int size,
+ unsigned char *iv)
+{
+ struct scatterlist src_sg, dst_sg;
+
+ src_sg.page = src_page;
+ src_sg.offset = src_offset;
+ src_sg.length = size;
+ dst_sg.page = dst_page;
+ dst_sg.offset = dst_offset;
+ dst_sg.length = size;
+ return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv);
+}
+
+#define ECRYPTFS_MAX_SCATTERLIST_LEN 4
+
+/**
+ * ecryptfs_init_crypt_ctx
+ * @crypt_stat: Uninitilized crypt stats structure
+ *
+ * Initialize the crypto context.
+ *
+ * TODO: Performance: Keep a cache of initialized cipher contexts;
+ * only init if needed
+ */
+int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ int rc = -EINVAL;
+
+ if (!crypt_stat->cipher) {
+ ecryptfs_printk(KERN_ERR, "No cipher specified\n");
+ goto out;
+ }
+ ecryptfs_printk(KERN_DEBUG,
+ "Initializing cipher [%s]; strlen = [%d]; "
+ "key_size_bits = [%d]\n",
+ crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
+ crypt_stat->key_size << 3);
+ if (crypt_stat->tfm) {
+ rc = 0;
+ goto out;
+ }
+ mutex_lock(&crypt_stat->cs_tfm_mutex);
+ crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher,
+ ECRYPTFS_DEFAULT_CHAINING_MODE
+ | CRYPTO_TFM_REQ_WEAK_KEY);
+ mutex_unlock(&crypt_stat->cs_tfm_mutex);
+ if (!crypt_stat->tfm) {
+ ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
+ "Error initializing cipher [%s]\n",
+ crypt_stat->cipher);
+ goto out;
+ }
+ rc = 0;
+out:
+ return rc;
+}
+
+static void set_extent_mask_and_shift(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ int extent_size_tmp;
+
+ crypt_stat->extent_mask = 0xFFFFFFFF;
+ crypt_stat->extent_shift = 0;
+ if (crypt_stat->extent_size == 0)
+ return;
+ extent_size_tmp = crypt_stat->extent_size;
+ while ((extent_size_tmp & 0x01) == 0) {
+ extent_size_tmp >>= 1;
+ crypt_stat->extent_mask <<= 1;
+ crypt_stat->extent_shift++;
+ }
+}
+
+void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ /* Default values; may be overwritten as we are parsing the
+ * packets. */
+ crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
+ set_extent_mask_and_shift(crypt_stat);
+ crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
+ if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
+ crypt_stat->header_extent_size =
+ ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
+ } else
+ crypt_stat->header_extent_size = PAGE_CACHE_SIZE;
+ crypt_stat->num_header_extents_at_front = 1;
+}
+
+/**
+ * ecryptfs_compute_root_iv
+ * @crypt_stats
+ *
+ * On error, sets the root IV to all 0's.
+ */
+int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ int rc = 0;
+ char dst[MD5_DIGEST_SIZE];
+
+ BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE);
+ BUG_ON(crypt_stat->iv_bytes <= 0);
+ if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_WARNING, "Session key not valid; "
+ "cannot generate root IV\n");
+ goto out;
+ }
+ rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key,
+ crypt_stat->key_size);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error attempting to compute "
+ "MD5 while generating root IV\n");
+ goto out;
+ }
+ memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes);
+out:
+ if (rc) {
+ memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes);
+ ECRYPTFS_SET_FLAG(crypt_stat->flags,
+ ECRYPTFS_SECURITY_WARNING);
+ }
+ return rc;
+}
+
+static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ get_random_bytes(crypt_stat->key, crypt_stat->key_size);
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ ecryptfs_compute_root_iv(crypt_stat);
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n");
+ ecryptfs_dump_hex(crypt_stat->key,
+ crypt_stat->key_size);
+ }
+}
+
+/**
+ * ecryptfs_set_default_crypt_stat_vals
+ * @crypt_stat
+ *
+ * Default values in the event that policy does not override them.
+ */
+static void ecryptfs_set_default_crypt_stat_vals(
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
+{
+ ecryptfs_set_default_sizes(crypt_stat);
+ strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER);
+ crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES;
+ ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ crypt_stat->file_version = ECRYPTFS_FILE_VERSION;
+ crypt_stat->mount_crypt_stat = mount_crypt_stat;
+}
+
+/**
+ * ecryptfs_new_file_context
+ * @ecryptfs_dentry
+ *
+ * If the crypto context for the file has not yet been established,
+ * this is where we do that. Establishing a new crypto context
+ * involves the following decisions:
+ * - What cipher to use?
+ * - What set of authentication tokens to use?
+ * Here we just worry about getting enough information into the
+ * authentication tokens so that we know that they are available.
+ * We associate the available authentication tokens with the new file
+ * via the set of signatures in the crypt_stat struct. Later, when
+ * the headers are actually written out, we may again defer to
+ * userspace to perform the encryption of the session key; for the
+ * foreseeable future, this will be the case with public key packets.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+/* Associate an authentication token(s) with the file */
+int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
+{
+ int rc = 0;
+ struct ecryptfs_crypt_stat *crypt_stat =
+ &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+ &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ int cipher_name_len;
+
+ ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat);
+ /* See if there are mount crypt options */
+ if (mount_crypt_stat->global_auth_tok) {
+ ecryptfs_printk(KERN_DEBUG, "Initializing context for new "
+ "file using mount_crypt_stat\n");
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++],
+ mount_crypt_stat->global_auth_tok_sig,
+ ECRYPTFS_SIG_SIZE_HEX);
+ cipher_name_len =
+ strlen(mount_crypt_stat->global_default_cipher_name);
+ memcpy(crypt_stat->cipher,
+ mount_crypt_stat->global_default_cipher_name,
+ cipher_name_len);
+ crypt_stat->cipher[cipher_name_len] = '\0';
+ crypt_stat->key_size =
+ mount_crypt_stat->global_default_cipher_key_size;
+ ecryptfs_generate_new_key(crypt_stat);
+ } else
+ /* We should not encounter this scenario since we
+ * should detect lack of global_auth_tok at mount time
+ * TODO: Applies to 0.1 release only; remove in future
+ * release */
+ BUG();
+ rc = ecryptfs_init_crypt_ctx(crypt_stat);
+ if (rc)
+ ecryptfs_printk(KERN_ERR, "Error initializing cryptographic "
+ "context for cipher [%s]: rc = [%d]\n",
+ crypt_stat->cipher, rc);
+ return rc;
+}
+
+/**
+ * contains_ecryptfs_marker - check for the ecryptfs marker
+ * @data: The data block in which to check
+ *
+ * Returns one if marker found; zero if not found
+ */
+int contains_ecryptfs_marker(char *data)
+{
+ u32 m_1, m_2;
+
+ memcpy(&m_1, data, 4);
+ m_1 = be32_to_cpu(m_1);
+ memcpy(&m_2, (data + 4), 4);
+ m_2 = be32_to_cpu(m_2);
+ if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
+ return 1;
+ ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
+ "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2,
+ MAGIC_ECRYPTFS_MARKER);
+ ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = "
+ "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER));
+ return 0;
+}
+
+struct ecryptfs_flag_map_elem {
+ u32 file_flag;
+ u32 local_flag;
+};
+
+/* Add support for additional flags by adding elements here. */
+static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
+ {0x00000001, ECRYPTFS_ENABLE_HMAC},
+ {0x00000002, ECRYPTFS_ENCRYPTED}
+};
+
+/**
+ * ecryptfs_process_flags
+ * @crypt_stat
+ * @page_virt: Source data to be parsed
+ * @bytes_read: Updated with the number of bytes read
+ *
+ * Returns zero on success; non-zero if the flag set is invalid
+ */
+static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
+ char *page_virt, int *bytes_read)
+{
+ int rc = 0;
+ int i;
+ u32 flags;
+
+ memcpy(&flags, page_virt, 4);
+ flags = be32_to_cpu(flags);
+ for (i = 0; i < ((sizeof(ecryptfs_flag_map)
+ / sizeof(struct ecryptfs_flag_map_elem))); i++)
+ if (flags & ecryptfs_flag_map[i].file_flag) {
+ ECRYPTFS_SET_FLAG(crypt_stat->flags,
+ ecryptfs_flag_map[i].local_flag);
+ } else
+ ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
+ ecryptfs_flag_map[i].local_flag);
+ /* Version is in top 8 bits of the 32-bit flag vector */
+ crypt_stat->file_version = ((flags >> 24) & 0xFF);
+ (*bytes_read) = 4;
+ return rc;
+}
+
+/**
+ * write_ecryptfs_marker
+ * @page_virt: The pointer to in a page to begin writing the marker
+ * @written: Number of bytes written
+ *
+ * Marker = 0x3c81b7f5
+ */
+static void write_ecryptfs_marker(char *page_virt, size_t *written)
+{
+ u32 m_1, m_2;
+
+ get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
+ m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
+ m_1 = cpu_to_be32(m_1);
+ memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
+ m_2 = cpu_to_be32(m_2);
+ memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
+ (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
+ (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
+}
+
+static void
+write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
+ size_t *written)
+{
+ u32 flags = 0;
+ int i;
+
+ for (i = 0; i < ((sizeof(ecryptfs_flag_map)
+ / sizeof(struct ecryptfs_flag_map_elem))); i++)
+ if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
+ ecryptfs_flag_map[i].local_flag))
+ flags |= ecryptfs_flag_map[i].file_flag;
+ /* Version is in top 8 bits of the 32-bit flag vector */
+ flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
+ flags = cpu_to_be32(flags);
+ memcpy(page_virt, &flags, 4);
+ (*written) = 4;
+}
+
+struct ecryptfs_cipher_code_str_map_elem {
+ char cipher_str[16];
+ u16 cipher_code;
+};
+
+/* Add support for additional ciphers by adding elements here. The
+ * cipher_code is whatever OpenPGP applicatoins use to identify the
+ * ciphers. List in order of probability. */
+static struct ecryptfs_cipher_code_str_map_elem
+ecryptfs_cipher_code_str_map[] = {
+ {"aes",RFC2440_CIPHER_AES_128 },
+ {"blowfish", RFC2440_CIPHER_BLOWFISH},
+ {"des3_ede", RFC2440_CIPHER_DES3_EDE},
+ {"cast5", RFC2440_CIPHER_CAST_5},
+ {"twofish", RFC2440_CIPHER_TWOFISH},
+ {"cast6", RFC2440_CIPHER_CAST_6},
+ {"aes", RFC2440_CIPHER_AES_192},
+ {"aes", RFC2440_CIPHER_AES_256}
+};
+
+/**
+ * ecryptfs_code_for_cipher_string
+ * @str: The string representing the cipher name
+ *
+ * Returns zero on no match, or the cipher code on match
+ */
+u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ int i;
+ u16 code = 0;
+ struct ecryptfs_cipher_code_str_map_elem *map =
+ ecryptfs_cipher_code_str_map;
+
+ if (strcmp(crypt_stat->cipher, "aes") == 0) {
+ switch (crypt_stat->key_size) {
+ case 16:
+ code = RFC2440_CIPHER_AES_128;
+ break;
+ case 24:
+ code = RFC2440_CIPHER_AES_192;
+ break;
+ case 32:
+ code = RFC2440_CIPHER_AES_256;
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
+ if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){
+ code = map[i].cipher_code;
+ break;
+ }
+ }
+ return code;
+}
+
+/**
+ * ecryptfs_cipher_code_to_string
+ * @str: Destination to write out the cipher name
+ * @cipher_code: The code to convert to cipher name string
+ *
+ * Returns zero on success
+ */
+int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
+{
+ int rc = 0;
+ int i;
+
+ str[0] = '\0';
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++)
+ if (cipher_code == ecryptfs_cipher_code_str_map[i].cipher_code)
+ strcpy(str, ecryptfs_cipher_code_str_map[i].cipher_str);
+ if (str[0] == '\0') {
+ ecryptfs_printk(KERN_WARNING, "Cipher code not recognized: "
+ "[%d]\n", cipher_code);
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+/**
+ * ecryptfs_read_header_region
+ * @data
+ * @dentry
+ * @nd
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_read_header_region(char *data, struct dentry *dentry,
+ struct vfsmount *mnt)
+{
+ struct file *file;
+ mm_segment_t oldfs;
+ int rc;
+
+ mnt = mntget(mnt);
+ file = dentry_open(dentry, mnt, O_RDONLY);
+ if (IS_ERR(file)) {
+ ecryptfs_printk(KERN_DEBUG, "Error opening file to "
+ "read header region\n");
+ mntput(mnt);
+ rc = PTR_ERR(file);
+ goto out;
+ }
+ file->f_pos = 0;
+ oldfs = get_fs();
+ set_fs(get_ds());
+ /* For releases 0.1 and 0.2, all of the header information
+ * fits in the first data extent-sized region. */
+ rc = file->f_op->read(file, (char __user *)data,
+ ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos);
+ set_fs(oldfs);
+ fput(file);
+ rc = 0;
+out:
+ return rc;
+}
+
+static void
+write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat,
+ size_t *written)
+{
+ u32 header_extent_size;
+ u16 num_header_extents_at_front;
+
+ header_extent_size = (u32)crypt_stat->header_extent_size;
+ num_header_extents_at_front =
+ (u16)crypt_stat->num_header_extents_at_front;
+ header_extent_size = cpu_to_be32(header_extent_size);
+ memcpy(virt, &header_extent_size, 4);
+ virt += 4;
+ num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front);
+ memcpy(virt, &num_header_extents_at_front, 2);
+ (*written) = 6;
+}
+
+struct kmem_cache *ecryptfs_header_cache_0;
+struct kmem_cache *ecryptfs_header_cache_1;
+struct kmem_cache *ecryptfs_header_cache_2;
+
+/**
+ * ecryptfs_write_headers_virt
+ * @page_virt
+ * @crypt_stat
+ * @ecryptfs_dentry
+ *
+ * Format version: 1
+ *
+ * Header Extent:
+ * Octets 0-7: Unencrypted file size (big-endian)
+ * Octets 8-15: eCryptfs special marker
+ * Octets 16-19: Flags
+ * Octet 16: File format version number (between 0 and 255)
+ * Octets 17-18: Reserved
+ * Octet 19: Bit 1 (lsb): Reserved
+ * Bit 2: Encrypted?
+ * Bits 3-8: Reserved
+ * Octets 20-23: Header extent size (big-endian)
+ * Octets 24-25: Number of header extents at front of file
+ * (big-endian)
+ * Octet 26: Begin RFC 2440 authentication token packet set
+ * Data Extent 0:
+ * Lower data (CBC encrypted)
+ * Data Extent 1:
+ * Lower data (CBC encrypted)
+ * ...
+ *
+ * Returns zero on success
+ */
+int ecryptfs_write_headers_virt(char *page_virt,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct dentry *ecryptfs_dentry)
+{
+ int rc;
+ size_t written;
+ size_t offset;
+
+ offset = ECRYPTFS_FILE_SIZE_BYTES;
+ write_ecryptfs_marker((page_virt + offset), &written);
+ offset += written;
+ write_ecryptfs_flags((page_virt + offset), crypt_stat, &written);
+ offset += written;
+ write_header_metadata((page_virt + offset), crypt_stat, &written);
+ offset += written;
+ rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat,
+ ecryptfs_dentry, &written,
+ PAGE_CACHE_SIZE - offset);
+ if (rc)
+ ecryptfs_printk(KERN_WARNING, "Error generating key packet "
+ "set; rc = [%d]\n", rc);
+ return rc;
+}
+
+/**
+ * ecryptfs_write_headers
+ * @lower_file: The lower file struct, which was returned from dentry_open
+ *
+ * Write the file headers out. This will likely involve a userspace
+ * callout, in which the session key is encrypted with one or more
+ * public keys and/or the passphrase necessary to do the encryption is
+ * retrieved via a prompt. Exactly what happens at this point should
+ * be policy-dependent.
+ *
+ * Returns zero on success; non-zero on error
+ */
+int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
+ struct file *lower_file)
+{
+ mm_segment_t oldfs;
+ struct ecryptfs_crypt_stat *crypt_stat;
+ char *page_virt;
+ int current_header_page;
+ int header_pages;
+ int rc = 0;
+
+ crypt_stat = &ecryptfs_inode_to_private(
+ ecryptfs_dentry->d_inode)->crypt_stat;
+ if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
+ ECRYPTFS_ENCRYPTED))) {
+ if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
+ ECRYPTFS_KEY_VALID)) {
+ ecryptfs_printk(KERN_DEBUG, "Key is "
+ "invalid; bailing out\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ } else {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_WARNING,
+ "Called with crypt_stat->encrypted == 0\n");
+ goto out;
+ }
+ /* Released in this function */
+ page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER);
+ if (!page_virt) {
+ ecryptfs_printk(KERN_ERR, "Out of memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(page_virt, 0, PAGE_CACHE_SIZE);
+ rc = ecryptfs_write_headers_virt(page_virt, crypt_stat,
+ ecryptfs_dentry);
+ if (unlikely(rc)) {
+ ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n");
+ memset(page_virt, 0, PAGE_CACHE_SIZE);
+ goto out_free;
+ }
+ ecryptfs_printk(KERN_DEBUG,
+ "Writing key packet set to underlying file\n");
+ lower_file->f_pos = 0;
+ oldfs = get_fs();
+ set_fs(get_ds());
+ ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
+ "write() w/ header page; lower_file->f_pos = "
+ "[0x%.16x]\n", lower_file->f_pos);
+ lower_file->f_op->write(lower_file, (char __user *)page_virt,
+ PAGE_CACHE_SIZE, &lower_file->f_pos);
+ header_pages = ((crypt_stat->header_extent_size
+ * crypt_stat->num_header_extents_at_front)
+ / PAGE_CACHE_SIZE);
+ memset(page_virt, 0, PAGE_CACHE_SIZE);
+ current_header_page = 1;
+ while (current_header_page < header_pages) {
+ ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
+ "write() w/ zero'd page; lower_file->f_pos = "
+ "[0x%.16x]\n", lower_file->f_pos);
+ lower_file->f_op->write(lower_file, (char __user *)page_virt,
+ PAGE_CACHE_SIZE, &lower_file->f_pos);
+ current_header_page++;
+ }
+ set_fs(oldfs);
+ ecryptfs_printk(KERN_DEBUG,
+ "Done writing key packet set to underlying file.\n");
+out_free:
+ kmem_cache_free(ecryptfs_header_cache_0, page_virt);
+out:
+ return rc;
+}
+
+static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
+ char *virt, int *bytes_read)
+{
+ int rc = 0;
+ u32 header_extent_size;
+ u16 num_header_extents_at_front;
+
+ memcpy(&header_extent_size, virt, 4);
+ header_extent_size = be32_to_cpu(header_extent_size);
+ virt += 4;
+ memcpy(&num_header_extents_at_front, virt, 2);
+ num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
+ crypt_stat->header_extent_size = (int)header_extent_size;
+ crypt_stat->num_header_extents_at_front =
+ (int)num_header_extents_at_front;
+ (*bytes_read) = 6;
+ if ((crypt_stat->header_extent_size
+ * crypt_stat->num_header_extents_at_front)
+ < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_WARNING, "Invalid header extent size: "
+ "[%d]\n", crypt_stat->header_extent_size);
+ }
+ return rc;
+}
+
+/**
+ * set_default_header_data
+ *
+ * For version 0 file format; this function is only for backwards
+ * compatibility for files created with the prior versions of
+ * eCryptfs.
+ */
+static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
+{
+ crypt_stat->header_extent_size = 4096;
+ crypt_stat->num_header_extents_at_front = 1;
+}
+
+/**
+ * ecryptfs_read_headers_virt
+ *
+ * Read/parse the header data. The header format is detailed in the
+ * comment block for the ecryptfs_write_headers_virt() function.
+ *
+ * Returns zero on success
+ */
+static int ecryptfs_read_headers_virt(char *page_virt,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct dentry *ecryptfs_dentry)
+{
+ int rc = 0;
+ int offset;
+ int bytes_read;
+
+ ecryptfs_set_default_sizes(crypt_stat);
+ crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ offset = ECRYPTFS_FILE_SIZE_BYTES;
+ rc = contains_ecryptfs_marker(page_virt + offset);
+ if (rc == 0) {
+ rc = -EINVAL;
+ goto out;
+ }
+ offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
+ rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
+ &bytes_read);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error processing flags\n");
+ goto out;
+ }
+ if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) {
+ ecryptfs_printk(KERN_WARNING, "File version is [%d]; only "
+ "file version [%d] is supported by this "
+ "version of eCryptfs\n",
+ crypt_stat->file_version,
+ ECRYPTFS_SUPPORTED_FILE_VERSION);
+ rc = -EINVAL;
+ goto out;
+ }
+ offset += bytes_read;
+ if (crypt_stat->file_version >= 1) {
+ rc = parse_header_metadata(crypt_stat, (page_virt + offset),
+ &bytes_read);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error reading header "
+ "metadata; rc = [%d]\n", rc);
+ }
+ offset += bytes_read;
+ } else
+ set_default_header_data(crypt_stat);
+ rc = ecryptfs_parse_packet_set(crypt_stat, (page_virt + offset),
+ ecryptfs_dentry);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_read_headers
+ *
+ * Returns zero if valid headers found and parsed; non-zero otherwise
+ */
+int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
+ struct file *lower_file)
+{
+ int rc = 0;
+ char *page_virt = NULL;
+ mm_segment_t oldfs;
+ ssize_t bytes_read;
+ struct ecryptfs_crypt_stat *crypt_stat =
+ &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+
+ /* Read the first page from the underlying file */
+ page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER);
+ if (!page_virt) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n");
+ goto out;
+ }
+ lower_file->f_pos = 0;
+ oldfs = get_fs();
+ set_fs(get_ds());
+ bytes_read = lower_file->f_op->read(lower_file,
+ (char __user *)page_virt,
+ ECRYPTFS_DEFAULT_EXTENT_SIZE,
+ &lower_file->f_pos);
+ set_fs(oldfs);
+ if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) {
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
+ ecryptfs_dentry);
+ if (rc) {
+ ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not "
+ "found\n");
+ rc = -EINVAL;
+ }
+out:
+ if (page_virt) {
+ memset(page_virt, 0, PAGE_CACHE_SIZE);
+ kmem_cache_free(ecryptfs_header_cache_1, page_virt);
+ }
+ return rc;
+}
+
+/**
+ * ecryptfs_encode_filename - converts a plaintext file name to cipher text
+ * @crypt_stat: The crypt_stat struct associated with the file anem to encode
+ * @name: The plaintext name
+ * @length: The length of the plaintext
+ * @encoded_name: The encypted name
+ *
+ * Encrypts and encodes a filename into something that constitutes a
+ * valid filename for a filesystem, with printable characters.
+ *
+ * We assume that we have a properly initialized crypto context,
+ * pointed to by crypt_stat->tfm.
+ *
+ * TODO: Implement filename decoding and decryption here, in place of
+ * memcpy. We are keeping the framework around for now to (1)
+ * facilitate testing of the components needed to implement filename
+ * encryption and (2) to provide a code base from which other
+ * developers in the community can easily implement this feature.
+ *
+ * Returns the length of encoded filename; negative if error
+ */
+int
+ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
+ const char *name, int length, char **encoded_name)
+{
+ int error = 0;
+
+ (*encoded_name) = kmalloc(length + 2, GFP_KERNEL);
+ if (!(*encoded_name)) {
+ error = -ENOMEM;
+ goto out;
+ }
+ /* TODO: Filename encryption is a scheduled feature for a
+ * future version of eCryptfs. This function is here only for
+ * the purpose of providing a framework for other developers
+ * to easily implement filename encryption. Hint: Replace this
+ * memcpy() with a call to encrypt and encode the
+ * filename, the set the length accordingly. */
+ memcpy((void *)(*encoded_name), (void *)name, length);
+ (*encoded_name)[length] = '\0';
+ error = length + 1;
+out:
+ return error;
+}
+
+/**
+ * ecryptfs_decode_filename - converts the cipher text name to plaintext
+ * @crypt_stat: The crypt_stat struct associated with the file
+ * @name: The filename in cipher text
+ * @length: The length of the cipher text name
+ * @decrypted_name: The plaintext name
+ *
+ * Decodes and decrypts the filename.
+ *
+ * We assume that we have a properly initialized crypto context,
+ * pointed to by crypt_stat->tfm.
+ *
+ * TODO: Implement filename decoding and decryption here, in place of
+ * memcpy. We are keeping the framework around for now to (1)
+ * facilitate testing of the components needed to implement filename
+ * encryption and (2) to provide a code base from which other
+ * developers in the community can easily implement this feature.
+ *
+ * Returns the length of decoded filename; negative if error
+ */
+int
+ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
+ const char *name, int length, char **decrypted_name)
+{
+ int error = 0;
+
+ (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL);
+ if (!(*decrypted_name)) {
+ error = -ENOMEM;
+ goto out;
+ }
+ /* TODO: Filename encryption is a scheduled feature for a
+ * future version of eCryptfs. This function is here only for
+ * the purpose of providing a framework for other developers
+ * to easily implement filename encryption. Hint: Replace this
+ * memcpy() with a call to decode and decrypt the
+ * filename, the set the length accordingly. */
+ memcpy((void *)(*decrypted_name), (void *)name, length);
+ (*decrypted_name)[length + 1] = '\0'; /* Only for convenience
+ * in printing out the
+ * string in debug
+ * messages */
+ error = length;
+out:
+ return error;
+}
+
+/**
+ * ecryptfs_process_cipher - Perform cipher initialization.
+ * @tfm: Crypto context set by this function
+ * @key_tfm: Crypto context for key material, set by this function
+ * @cipher_name: Name of the cipher.
+ * @key_size: Size of the key in bytes.
+ *
+ * Returns zero on success. Any crypto_tfm structs allocated here
+ * should be released by other functions, such as on a superblock put
+ * event, regardless of whether this function succeeds for fails.
+ */
+int
+ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
+ char *cipher_name, size_t key_size)
+{
+ char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
+ int rc;
+
+ *tfm = *key_tfm = NULL;
+ if (key_size > ECRYPTFS_MAX_KEY_BYTES) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum "
+ "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES);
+ goto out;
+ }
+ *tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE
+ | CRYPTO_TFM_REQ_WEAK_KEY));
+ if (!(*tfm)) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Unable to allocate crypto cipher with name "
+ "[%s]\n", cipher_name);
+ goto out;
+ }
+ *key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY);
+ if (!(*key_tfm)) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Unable to allocate crypto cipher with name "
+ "[%s]\n", cipher_name);
+ goto out;
+ }
+ if (key_size < crypto_tfm_alg_min_keysize(*tfm)) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
+ "supported by cipher [%s] is [%d]\n", key_size,
+ cipher_name, crypto_tfm_alg_min_keysize(*tfm));
+ goto out;
+ }
+ if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Request key size is [%Zd]; minimum key size "
+ "supported by cipher [%s] is [%d]\n", key_size,
+ cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
+ goto out;
+ }
+ if (key_size > crypto_tfm_alg_max_keysize(*tfm)) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
+ "supported by cipher [%s] is [%d]\n", key_size,
+ cipher_name, crypto_tfm_alg_min_keysize(*tfm));
+ goto out;
+ }
+ if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) {
+ rc = -EINVAL;
+ printk(KERN_ERR "Request key size is [%Zd]; maximum key size "
+ "supported by cipher [%s] is [%d]\n", key_size,
+ cipher_name, crypto_tfm_alg_min_keysize(*key_tfm));
+ goto out;
+ }
+ get_random_bytes(dummy_key, key_size);
+ rc = crypto_cipher_setkey(*tfm, dummy_key, key_size);
+ if (rc) {
+ printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
+ "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size);
+ if (rc) {
+ printk(KERN_ERR "Error attempting to set key of size [%Zd] for "
+ "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc);
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ return rc;
+}
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
new file mode 100644
index 000000000000..61f8e894284f
--- /dev/null
+++ b/fs/ecryptfs/debug.c
@@ -0,0 +1,123 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ * Functions only useful for debugging.
+ *
+ * Copyright (C) 2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include "ecryptfs_kernel.h"
+
+/**
+ * ecryptfs_dump_auth_tok - debug function to print auth toks
+ *
+ * This function will print the contents of an ecryptfs authentication
+ * token.
+ */
+void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
+{
+ char salt[ECRYPTFS_SALT_SIZE * 2 + 1];
+ char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
+
+ ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n",
+ auth_tok);
+ if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) {
+ ecryptfs_printk(KERN_DEBUG, " * private key type\n");
+ ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT "
+ "IN ECRYPTFS VERSION 0.1)\n");
+ } else {
+ ecryptfs_printk(KERN_DEBUG, " * passphrase type\n");
+ ecryptfs_to_hex(salt, auth_tok->token.password.salt,
+ ECRYPTFS_SALT_SIZE);
+ salt[ECRYPTFS_SALT_SIZE * 2] = '\0';
+ ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt);
+ if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
+ ECRYPTFS_PERSISTENT_PASSWORD)) {
+ ecryptfs_printk(KERN_DEBUG, " * persistent\n");
+ }
+ memcpy(sig, auth_tok->token.password.signature,
+ ECRYPTFS_SIG_SIZE_HEX);
+ sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
+ ecryptfs_printk(KERN_DEBUG, " * signature = [%s]\n", sig);
+ }
+ ecryptfs_printk(KERN_DEBUG, " * session_key.flags = [0x%x]\n",
+ auth_tok->session_key.flags);
+ if (auth_tok->session_key.flags
+ & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT)
+ ecryptfs_printk(KERN_DEBUG,
+ " * Userspace decrypt request set\n");
+ if (auth_tok->session_key.flags
+ & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT)
+ ecryptfs_printk(KERN_DEBUG,
+ " * Userspace encrypt request set\n");
+ if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_DECRYPTED_KEY) {
+ ecryptfs_printk(KERN_DEBUG, " * Contains decrypted key\n");
+ ecryptfs_printk(KERN_DEBUG,
+ " * session_key.decrypted_key_size = [0x%x]\n",
+ auth_tok->session_key.decrypted_key_size);
+ ecryptfs_printk(KERN_DEBUG, " * Decrypted session key "
+ "dump:\n");
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_hex(auth_tok->session_key.decrypted_key,
+ ECRYPTFS_DEFAULT_KEY_BYTES);
+ }
+ if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_ENCRYPTED_KEY) {
+ ecryptfs_printk(KERN_DEBUG, " * Contains encrypted key\n");
+ ecryptfs_printk(KERN_DEBUG,
+ " * session_key.encrypted_key_size = [0x%x]\n",
+ auth_tok->session_key.encrypted_key_size);
+ ecryptfs_printk(KERN_DEBUG, " * Encrypted session key "
+ "dump:\n");
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_hex(auth_tok->session_key.encrypted_key,
+ auth_tok->session_key.
+ encrypted_key_size);
+ }
+}
+
+/**
+ * ecryptfs_dump_hex - debug hex printer
+ * @data: string of bytes to be printed
+ * @bytes: number of bytes to print
+ *
+ * Dump hexadecimal representation of char array
+ */
+void ecryptfs_dump_hex(char *data, int bytes)
+{
+ int i = 0;
+ int add_newline = 1;
+
+ if (ecryptfs_verbosity < 1)
+ return;
+ if (bytes != 0) {
+ printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]);
+ i++;
+ }
+ while (i < bytes) {
+ printk("0x%.2x.", (unsigned char)data[i]);
+ i++;
+ if (i % 16 == 0) {
+ printk("\n");
+ add_newline = 0;
+ } else
+ add_newline = 1;
+ }
+ if (add_newline)
+ printk("\n");
+}
+
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
new file mode 100644
index 000000000000..f0d2a433242b
--- /dev/null
+++ b/fs/ecryptfs/dentry.c
@@ -0,0 +1,87 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 1997-2003 Erez Zadok
+ * Copyright (C) 2001-2003 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include "ecryptfs_kernel.h"
+
+/**
+ * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
+ * @dentry: The ecryptfs dentry
+ * @nd: The associated nameidata
+ *
+ * Called when the VFS needs to revalidate a dentry. This
+ * is called whenever a name lookup finds a dentry in the
+ * dcache. Most filesystems leave this as NULL, because all their
+ * dentries in the dcache are valid.
+ *
+ * Returns 1 if valid, 0 otherwise.
+ *
+ */
+static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+ struct dentry *dentry_save;
+ struct vfsmount *vfsmount_save;
+ int rc = 1;
+
+ if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
+ goto out;
+ dentry_save = nd->dentry;
+ vfsmount_save = nd->mnt;
+ nd->dentry = lower_dentry;
+ nd->mnt = lower_mnt;
+ rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
+ nd->dentry = dentry_save;
+ nd->mnt = vfsmount_save;
+out:
+ return rc;
+}
+
+struct kmem_cache *ecryptfs_dentry_info_cache;
+
+/**
+ * ecryptfs_d_release
+ * @dentry: The ecryptfs dentry
+ *
+ * Called when a dentry is really deallocated.
+ */
+static void ecryptfs_d_release(struct dentry *dentry)
+{
+ struct dentry *lower_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (ecryptfs_dentry_to_private(dentry))
+ kmem_cache_free(ecryptfs_dentry_info_cache,
+ ecryptfs_dentry_to_private(dentry));
+ if (lower_dentry)
+ dput(lower_dentry);
+ return;
+}
+
+struct dentry_operations ecryptfs_dops = {
+ .d_revalidate = ecryptfs_d_revalidate,
+ .d_release = ecryptfs_d_release,
+};
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
new file mode 100644
index 000000000000..872c9958531a
--- /dev/null
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -0,0 +1,482 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ * Kernel declarations.
+ *
+ * Copyright (C) 1997-2003 Erez Zadok
+ * Copyright (C) 2001-2003 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef ECRYPTFS_KERNEL_H
+#define ECRYPTFS_KERNEL_H
+
+#include <keys/user-type.h>
+#include <linux/fs.h>
+#include <linux/scatterlist.h>
+
+/* Version verification for shared data structures w/ userspace */
+#define ECRYPTFS_VERSION_MAJOR 0x00
+#define ECRYPTFS_VERSION_MINOR 0x04
+#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01
+/* These flags indicate which features are supported by the kernel
+ * module; userspace tools such as the mount helper read
+ * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
+ * how to behave. */
+#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
+#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
+#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
+#define ECRYPTFS_VERSIONING_POLICY 0x00000008
+#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
+ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH)
+
+#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
+#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
+#define ECRYPTFS_SALT_SIZE 8
+#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
+/* The original signature size is only for what is stored on disk; all
+ * in-memory representations are expanded hex, so it better adapted to
+ * be passed around or referenced on the command line */
+#define ECRYPTFS_SIG_SIZE 8
+#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
+#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
+#define ECRYPTFS_MAX_KEY_BYTES 64
+#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
+#define ECRYPTFS_DEFAULT_IV_BYTES 16
+#define ECRYPTFS_FILE_VERSION 0x01
+#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192
+#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
+#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
+
+#define RFC2440_CIPHER_DES3_EDE 0x02
+#define RFC2440_CIPHER_CAST_5 0x03
+#define RFC2440_CIPHER_BLOWFISH 0x04
+#define RFC2440_CIPHER_AES_128 0x07
+#define RFC2440_CIPHER_AES_192 0x08
+#define RFC2440_CIPHER_AES_256 0x09
+#define RFC2440_CIPHER_TWOFISH 0x0a
+#define RFC2440_CIPHER_CAST_6 0x0b
+
+#define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag))
+#define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag))
+#define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag))
+
+/**
+ * For convenience, we may need to pass around the encrypted session
+ * key between kernel and userspace because the authentication token
+ * may not be extractable. For example, the TPM may not release the
+ * private key, instead requiring the encrypted data and returning the
+ * decrypted data.
+ */
+struct ecryptfs_session_key {
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
+#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
+#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
+ u32 flags;
+ u32 encrypted_key_size;
+ u32 decrypted_key_size;
+ u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
+ u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
+};
+
+struct ecryptfs_password {
+ u32 password_bytes;
+ s32 hash_algo;
+ u32 hash_iterations;
+ u32 session_key_encryption_key_bytes;
+#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
+#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
+ u32 flags;
+ /* Iterated-hash concatenation of salt and passphrase */
+ u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
+ u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+ /* Always in expanded hex */
+ u8 salt[ECRYPTFS_SALT_SIZE];
+};
+
+enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
+
+/* May be a password or a private key */
+struct ecryptfs_auth_tok {
+ u16 version; /* 8-bit major and 8-bit minor */
+ u16 token_type;
+ u32 flags;
+ struct ecryptfs_session_key session_key;
+ u8 reserved[32];
+ union {
+ struct ecryptfs_password password;
+ /* Private key is in future eCryptfs releases */
+ } token;
+} __attribute__ ((packed));
+
+void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
+extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
+extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
+
+struct ecryptfs_key_record {
+ unsigned char type;
+ size_t enc_key_size;
+ unsigned char sig[ECRYPTFS_SIG_SIZE];
+ unsigned char enc_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
+};
+
+struct ecryptfs_auth_tok_list {
+ struct ecryptfs_auth_tok *auth_tok;
+ struct list_head list;
+};
+
+struct ecryptfs_crypt_stat;
+struct ecryptfs_mount_crypt_stat;
+
+struct ecryptfs_page_crypt_context {
+ struct page *page;
+#define ECRYPTFS_PREPARE_COMMIT_MODE 0
+#define ECRYPTFS_WRITEPAGE_MODE 1
+ unsigned int mode;
+ union {
+ struct file *lower_file;
+ struct writeback_control *wbc;
+ } param;
+};
+
+static inline struct ecryptfs_auth_tok *
+ecryptfs_get_key_payload_data(struct key *key)
+{
+ return (struct ecryptfs_auth_tok *)
+ (((struct user_key_payload*)key->payload.data)->data);
+}
+
+#define ECRYPTFS_SUPER_MAGIC 0xf15f
+#define ECRYPTFS_MAX_KEYSET_SIZE 1024
+#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
+#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
+#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */
+#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
+#define ECRYPTFS_SALT_BYTES 2
+#define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5
+#define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */
+#define ECRYPTFS_FILE_SIZE_BYTES 8
+#define ECRYPTFS_DEFAULT_CIPHER "aes"
+#define ECRYPTFS_DEFAULT_KEY_BYTES 16
+#define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC
+#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
+#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
+#define MD5_DIGEST_SIZE 16
+
+/**
+ * This is the primary struct associated with each encrypted file.
+ *
+ * TODO: cache align/pack?
+ */
+struct ecryptfs_crypt_stat {
+#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001
+#define ECRYPTFS_POLICY_APPLIED 0x00000002
+#define ECRYPTFS_NEW_FILE 0x00000004
+#define ECRYPTFS_ENCRYPTED 0x00000008
+#define ECRYPTFS_SECURITY_WARNING 0x00000010
+#define ECRYPTFS_ENABLE_HMAC 0x00000020
+#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040
+#define ECRYPTFS_KEY_VALID 0x00000080
+ u32 flags;
+ unsigned int file_version;
+ size_t iv_bytes;
+ size_t num_keysigs;
+ size_t header_extent_size;
+ size_t num_header_extents_at_front;
+ size_t extent_size; /* Data extent size; default is 4096 */
+ size_t key_size;
+ size_t extent_shift;
+ unsigned int extent_mask;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+ struct crypto_tfm *tfm;
+ struct crypto_tfm *md5_tfm; /* Crypto context for generating
+ * the initialization vectors */
+ unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+ unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
+ unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
+ unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX];
+ struct mutex cs_tfm_mutex;
+ struct mutex cs_md5_tfm_mutex;
+ struct mutex cs_mutex;
+};
+
+/* inode private data. */
+struct ecryptfs_inode_info {
+ struct inode vfs_inode;
+ struct inode *wii_inode;
+ struct ecryptfs_crypt_stat crypt_stat;
+};
+
+/* dentry private data. Each dentry must keep track of a lower
+ * vfsmount too. */
+struct ecryptfs_dentry_info {
+ struct dentry *wdi_dentry;
+ struct vfsmount *lower_mnt;
+ struct ecryptfs_crypt_stat *crypt_stat;
+};
+
+/**
+ * This struct is to enable a mount-wide passphrase/salt combo. This
+ * is more or less a stopgap to provide similar functionality to other
+ * crypto filesystems like EncFS or CFS until full policy support is
+ * implemented in eCryptfs.
+ */
+struct ecryptfs_mount_crypt_stat {
+ /* Pointers to memory we do not own, do not free these */
+#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001
+ u32 flags;
+ struct ecryptfs_auth_tok *global_auth_tok;
+ struct key *global_auth_tok_key;
+ size_t global_default_cipher_key_size;
+ struct crypto_tfm *global_key_tfm;
+ struct mutex global_key_tfm_mutex;
+ unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
+ + 1];
+ unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
+};
+
+/* superblock private data. */
+struct ecryptfs_sb_info {
+ struct super_block *wsi_sb;
+ struct ecryptfs_mount_crypt_stat mount_crypt_stat;
+};
+
+/* file private data. */
+struct ecryptfs_file_info {
+ struct file *wfi_file;
+ struct ecryptfs_crypt_stat *crypt_stat;
+};
+
+/* auth_tok <=> encrypted_session_key mappings */
+struct ecryptfs_auth_tok_list_item {
+ unsigned char encrypted_session_key[ECRYPTFS_MAX_KEY_BYTES];
+ struct list_head list;
+ struct ecryptfs_auth_tok auth_tok;
+};
+
+static inline struct ecryptfs_file_info *
+ecryptfs_file_to_private(struct file *file)
+{
+ return (struct ecryptfs_file_info *)file->private_data;
+}
+
+static inline void
+ecryptfs_set_file_private(struct file *file,
+ struct ecryptfs_file_info *file_info)
+{
+ file->private_data = file_info;
+}
+
+static inline struct file *ecryptfs_file_to_lower(struct file *file)
+{
+ return ((struct ecryptfs_file_info *)file->private_data)->wfi_file;
+}
+
+static inline void
+ecryptfs_set_file_lower(struct file *file, struct file *lower_file)
+{
+ ((struct ecryptfs_file_info *)file->private_data)->wfi_file =
+ lower_file;
+}
+
+static inline struct ecryptfs_inode_info *
+ecryptfs_inode_to_private(struct inode *inode)
+{
+ return container_of(inode, struct ecryptfs_inode_info, vfs_inode);
+}
+
+static inline struct inode *ecryptfs_inode_to_lower(struct inode *inode)
+{
+ return ecryptfs_inode_to_private(inode)->wii_inode;
+}
+
+static inline void
+ecryptfs_set_inode_lower(struct inode *inode, struct inode *lower_inode)
+{
+ ecryptfs_inode_to_private(inode)->wii_inode = lower_inode;
+}
+
+static inline struct ecryptfs_sb_info *
+ecryptfs_superblock_to_private(struct super_block *sb)
+{
+ return (struct ecryptfs_sb_info *)sb->s_fs_info;
+}
+
+static inline void
+ecryptfs_set_superblock_private(struct super_block *sb,
+ struct ecryptfs_sb_info *sb_info)
+{
+ sb->s_fs_info = sb_info;
+}
+
+static inline struct super_block *
+ecryptfs_superblock_to_lower(struct super_block *sb)
+{
+ return ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb;
+}
+
+static inline void
+ecryptfs_set_superblock_lower(struct super_block *sb,
+ struct super_block *lower_sb)
+{
+ ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb = lower_sb;
+}
+
+static inline struct ecryptfs_dentry_info *
+ecryptfs_dentry_to_private(struct dentry *dentry)
+{
+ return (struct ecryptfs_dentry_info *)dentry->d_fsdata;
+}
+
+static inline void
+ecryptfs_set_dentry_private(struct dentry *dentry,
+ struct ecryptfs_dentry_info *dentry_info)
+{
+ dentry->d_fsdata = dentry_info;
+}
+
+static inline struct dentry *
+ecryptfs_dentry_to_lower(struct dentry *dentry)
+{
+ return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry;
+}
+
+static inline void
+ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry)
+{
+ ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry =
+ lower_dentry;
+}
+
+static inline struct vfsmount *
+ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
+{
+ return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt;
+}
+
+static inline void
+ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
+{
+ ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt =
+ lower_mnt;
+}
+
+#define ecryptfs_printk(type, fmt, arg...) \
+ __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg);
+void __ecryptfs_printk(const char *fmt, ...);
+
+extern const struct file_operations ecryptfs_main_fops;
+extern const struct file_operations ecryptfs_dir_fops;
+extern struct inode_operations ecryptfs_main_iops;
+extern struct inode_operations ecryptfs_dir_iops;
+extern struct inode_operations ecryptfs_symlink_iops;
+extern struct super_operations ecryptfs_sops;
+extern struct dentry_operations ecryptfs_dops;
+extern struct address_space_operations ecryptfs_aops;
+extern int ecryptfs_verbosity;
+
+extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
+extern struct kmem_cache *ecryptfs_file_info_cache;
+extern struct kmem_cache *ecryptfs_dentry_info_cache;
+extern struct kmem_cache *ecryptfs_inode_info_cache;
+extern struct kmem_cache *ecryptfs_sb_info_cache;
+extern struct kmem_cache *ecryptfs_header_cache_0;
+extern struct kmem_cache *ecryptfs_header_cache_1;
+extern struct kmem_cache *ecryptfs_header_cache_2;
+extern struct kmem_cache *ecryptfs_lower_page_cache;
+
+int ecryptfs_interpose(struct dentry *hidden_dentry,
+ struct dentry *this_dentry, struct super_block *sb,
+ int flag);
+int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
+int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
+ const char *name, int length,
+ char **decrypted_name);
+int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat,
+ const char *name, int length,
+ char **encoded_name);
+struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
+void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src);
+void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src);
+void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src);
+void ecryptfs_dump_hex(char *data, int bytes);
+int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
+ int sg_size);
+int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
+void ecryptfs_rotate_iv(unsigned char *iv);
+void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
+void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
+void ecryptfs_destruct_mount_crypt_stat(
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
+int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
+int ecryptfs_write_inode_size_to_header(struct file *lower_file,
+ struct inode *lower_inode,
+ struct inode *inode);
+int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
+ struct file *lower_file,
+ unsigned long lower_page_index, int byte_offset,
+ int region_bytes);
+int
+ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
+ struct file *lower_file, int byte_offset,
+ int region_size);
+int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
+ struct file *lower_file);
+int ecryptfs_do_readpage(struct file *file, struct page *page,
+ pgoff_t lower_page_index);
+int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
+ char **lower_virt,
+ struct inode *lower_inode,
+ unsigned long lower_page_index);
+int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
+ struct inode *lower_inode,
+ struct writeback_control *wbc);
+int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx);
+int ecryptfs_decrypt_page(struct file *file, struct page *page);
+int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
+ struct file *lower_file);
+int ecryptfs_write_headers_virt(char *page_virt,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct dentry *ecryptfs_dentry);
+int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
+ struct file *lower_file);
+int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
+int contains_ecryptfs_marker(char *data);
+int ecryptfs_read_header_region(char *data, struct dentry *dentry,
+ struct vfsmount *mnt);
+u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
+int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code);
+void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
+int ecryptfs_generate_key_packet_set(char *dest_base,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct dentry *ecryptfs_dentry,
+ size_t *len, size_t max);
+int process_request_key_err(long err_code);
+int
+ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
+ unsigned char *src, struct dentry *ecryptfs_dentry);
+int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
+int
+ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm,
+ char *cipher_name, size_t key_size);
+int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
+int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
+void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
+
+#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
new file mode 100644
index 000000000000..c8550c9f9cd2
--- /dev/null
+++ b/fs/ecryptfs/file.c
@@ -0,0 +1,440 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 1997-2004 Erez Zadok
+ * Copyright (C) 2001-2004 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
+ * Michael C. Thompson <mcthomps@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/security.h>
+#include <linux/smp_lock.h>
+#include <linux/compat.h>
+#include "ecryptfs_kernel.h"
+
+/**
+ * ecryptfs_llseek
+ * @file: File we are seeking in
+ * @offset: The offset to seek to
+ * @origin: 2 - offset from i_size; 1 - offset from f_pos
+ *
+ * Returns the position we have seeked to, or negative on error
+ */
+static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t rv;
+ loff_t new_end_pos;
+ int rc;
+ int expanding_file = 0;
+ struct inode *inode = file->f_mapping->host;
+
+ /* If our offset is past the end of our file, we're going to
+ * need to grow it so we have a valid length of 0's */
+ new_end_pos = offset;
+ switch (origin) {
+ case 2:
+ new_end_pos += i_size_read(inode);
+ expanding_file = 1;
+ break;
+ case 1:
+ new_end_pos += file->f_pos;
+ if (new_end_pos > i_size_read(inode)) {
+ ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) "
+ "> i_size_read(inode)(=[0x%.16x])\n",
+ new_end_pos, i_size_read(inode));
+ expanding_file = 1;
+ }
+ break;
+ default:
+ if (new_end_pos > i_size_read(inode)) {
+ ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) "
+ "> i_size_read(inode)(=[0x%.16x])\n",
+ new_end_pos, i_size_read(inode));
+ expanding_file = 1;
+ }
+ }
+ ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos);
+ if (expanding_file) {
+ rc = ecryptfs_truncate(file->f_dentry, new_end_pos);
+ if (rc) {
+ rv = rc;
+ ecryptfs_printk(KERN_ERR, "Error on attempt to "
+ "truncate to (higher) offset [0x%.16x];"
+ " rc = [%d]\n", new_end_pos, rc);
+ goto out;
+ }
+ }
+ rv = generic_file_llseek(file, offset, origin);
+out:
+ return rv;
+}
+
+/**
+ * ecryptfs_read_update_atime
+ *
+ * generic_file_read updates the atime of upper layer inode. But, it
+ * doesn't give us a chance to update the atime of the lower layer
+ * inode. This function is a wrapper to generic_file_read. It
+ * updates the atime of the lower level inode if generic_file_read
+ * returns without any errors. This is to be used only for file reads.
+ * The function to be used for directory reads is ecryptfs_read.
+ */
+static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ int rc;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_vfsmount;
+ struct file *file = iocb->ki_filp;
+
+ rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
+ /*
+ * Even though this is a async interface, we need to wait
+ * for IO to finish to update atime
+ */
+ if (-EIOCBQUEUED == rc)
+ rc = wait_on_sync_kiocb(iocb);
+ if (rc >= 0) {
+ lower_dentry = ecryptfs_dentry_to_lower(file->f_dentry);
+ lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_dentry);
+ touch_atime(lower_vfsmount, lower_dentry);
+ }
+ return rc;
+}
+
+struct ecryptfs_getdents_callback {
+ void *dirent;
+ struct dentry *dentry;
+ filldir_t filldir;
+ int err;
+ int filldir_called;
+ int entries_written;
+};
+
+/* Inspired by generic filldir in fs/readir.c */
+static int
+ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset,
+ u64 ino, unsigned int d_type)
+{
+ struct ecryptfs_crypt_stat *crypt_stat;
+ struct ecryptfs_getdents_callback *buf =
+ (struct ecryptfs_getdents_callback *)dirent;
+ int rc;
+ int decoded_length;
+ char *decoded_name;
+
+ crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat;
+ buf->filldir_called++;
+ decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen,
+ &decoded_name);
+ if (decoded_length < 0) {
+ rc = decoded_length;
+ goto out;
+ }
+ rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset,
+ ino, d_type);
+ kfree(decoded_name);
+ if (rc >= 0)
+ buf->entries_written++;
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_readdir
+ * @file: The ecryptfs file struct
+ * @dirent: Directory entry
+ * @filldir: The filldir callback function
+ */
+static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+ int rc;
+ struct file *lower_file;
+ struct inode *inode;
+ struct ecryptfs_getdents_callback buf;
+
+ lower_file = ecryptfs_file_to_lower(file);
+ lower_file->f_pos = file->f_pos;
+ inode = file->f_dentry->d_inode;
+ memset(&buf, 0, sizeof(buf));
+ buf.dirent = dirent;
+ buf.dentry = file->f_dentry;
+ buf.filldir = filldir;
+retry:
+ buf.filldir_called = 0;
+ buf.entries_written = 0;
+ buf.err = 0;
+ rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf);
+ if (buf.err)
+ rc = buf.err;
+ if (buf.filldir_called && !buf.entries_written)
+ goto retry;
+ file->f_pos = lower_file->f_pos;
+ if (rc >= 0)
+ ecryptfs_copy_attr_atime(inode, lower_file->f_dentry->d_inode);
+ return rc;
+}
+
+struct kmem_cache *ecryptfs_file_info_cache;
+
+/**
+ * ecryptfs_open
+ * @inode: inode speciying file to open
+ * @file: Structure to return filled in
+ *
+ * Opens the file specified by inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_open(struct inode *inode, struct file *file)
+{
+ int rc = 0;
+ struct ecryptfs_crypt_stat *crypt_stat = NULL;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+ struct dentry *ecryptfs_dentry = file->f_dentry;
+ /* Private value of ecryptfs_dentry allocated in
+ * ecryptfs_lookup() */
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+ struct inode *lower_inode = NULL;
+ struct file *lower_file = NULL;
+ struct vfsmount *lower_mnt;
+ struct ecryptfs_file_info *file_info;
+ int lower_flags;
+
+ /* Released in ecryptfs_release or end of function if failure */
+ file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL);
+ ecryptfs_set_file_private(file, file_info);
+ if (!file_info) {
+ ecryptfs_printk(KERN_ERR,
+ "Error attempting to allocate memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(file_info, 0, sizeof(*file_info));
+ lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+ crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ mutex_lock(&crypt_stat->cs_mutex);
+ if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) {
+ ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
+ /* Policy code enabled in future release */
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED);
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
+ }
+ mutex_unlock(&crypt_stat->cs_mutex);
+ /* This mntget & dget is undone via fput when the file is released */
+ dget(lower_dentry);
+ lower_flags = file->f_flags;
+ if ((lower_flags & O_ACCMODE) == O_WRONLY)
+ lower_flags = (lower_flags & O_ACCMODE) | O_RDWR;
+ if (file->f_flags & O_APPEND)
+ lower_flags &= ~O_APPEND;
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
+ mntget(lower_mnt);
+ /* Corresponding fput() in ecryptfs_release() */
+ lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags);
+ if (IS_ERR(lower_file)) {
+ rc = PTR_ERR(lower_file);
+ ecryptfs_printk(KERN_ERR, "Error opening lower file\n");
+ goto out_puts;
+ }
+ ecryptfs_set_file_lower(file, lower_file);
+ /* Isn't this check the same as the one in lookup? */
+ lower_inode = lower_dentry->d_inode;
+ if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
+ ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
+ ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
+ rc = 0;
+ goto out;
+ }
+ mutex_lock(&crypt_stat->cs_mutex);
+ if (i_size_read(lower_inode) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
+ if (!(mount_crypt_stat->flags
+ & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
+ rc = -EIO;
+ printk(KERN_WARNING "Attempt to read file that is "
+ "not in a valid eCryptfs format, and plaintext "
+ "passthrough mode is not enabled; returning "
+ "-EIO\n");
+ mutex_unlock(&crypt_stat->cs_mutex);
+ goto out_puts;
+ }
+ crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+ rc = 0;
+ mutex_unlock(&crypt_stat->cs_mutex);
+ goto out;
+ } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
+ ECRYPTFS_POLICY_APPLIED)
+ || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
+ ECRYPTFS_KEY_VALID)) {
+ rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file);
+ if (rc) {
+ ecryptfs_printk(KERN_DEBUG,
+ "Valid headers not found\n");
+ if (!(mount_crypt_stat->flags
+ & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
+ rc = -EIO;
+ printk(KERN_WARNING "Attempt to read file that "
+ "is not in a valid eCryptfs format, "
+ "and plaintext passthrough mode is not "
+ "enabled; returning -EIO\n");
+ mutex_unlock(&crypt_stat->cs_mutex);
+ goto out_puts;
+ }
+ ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
+ ECRYPTFS_ENCRYPTED);
+ rc = 0;
+ mutex_unlock(&crypt_stat->cs_mutex);
+ goto out;
+ }
+ }
+ mutex_unlock(&crypt_stat->cs_mutex);
+ ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
+ "size: [0x%.16x]\n", inode, inode->i_ino,
+ i_size_read(inode));
+ ecryptfs_set_file_lower(file, lower_file);
+ goto out;
+out_puts:
+ mntput(lower_mnt);
+ dput(lower_dentry);
+ kmem_cache_free(ecryptfs_file_info_cache,
+ ecryptfs_file_to_private(file));
+out:
+ return rc;
+}
+
+static int ecryptfs_flush(struct file *file, fl_owner_t td)
+{
+ int rc = 0;
+ struct file *lower_file = NULL;
+
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file->f_op && lower_file->f_op->flush)
+ rc = lower_file->f_op->flush(lower_file, td);
+ return rc;
+}
+
+static int ecryptfs_release(struct inode *inode, struct file *file)
+{
+ struct file *lower_file = ecryptfs_file_to_lower(file);
+ struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file);
+ struct inode *lower_inode = ecryptfs_inode_to_lower(inode);
+
+ fput(lower_file);
+ inode->i_blocks = lower_inode->i_blocks;
+ kmem_cache_free(ecryptfs_file_info_cache, file_info);
+ return 0;
+}
+
+static int
+ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ struct file *lower_file = ecryptfs_file_to_lower(file);
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ struct inode *lower_inode = lower_dentry->d_inode;
+ int rc = -EINVAL;
+
+ if (lower_inode->i_fop->fsync) {
+ mutex_lock(&lower_inode->i_mutex);
+ rc = lower_inode->i_fop->fsync(lower_file, lower_dentry,
+ datasync);
+ mutex_unlock(&lower_inode->i_mutex);
+ }
+ return rc;
+}
+
+static int ecryptfs_fasync(int fd, struct file *file, int flag)
+{
+ int rc = 0;
+ struct file *lower_file = NULL;
+
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file->f_op && lower_file->f_op->fasync)
+ rc = lower_file->f_op->fasync(fd, lower_file, flag);
+ return rc;
+}
+
+static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
+ size_t count, read_actor_t actor, void *target)
+{
+ struct file *lower_file = NULL;
+ int rc = -EINVAL;
+
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file->f_op && lower_file->f_op->sendfile)
+ rc = lower_file->f_op->sendfile(lower_file, ppos, count,
+ actor, target);
+
+ return rc;
+}
+
+static int ecryptfs_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg);
+
+const struct file_operations ecryptfs_dir_fops = {
+ .readdir = ecryptfs_readdir,
+ .ioctl = ecryptfs_ioctl,
+ .mmap = generic_file_mmap,
+ .open = ecryptfs_open,
+ .flush = ecryptfs_flush,
+ .release = ecryptfs_release,
+ .fsync = ecryptfs_fsync,
+ .fasync = ecryptfs_fasync,
+ .sendfile = ecryptfs_sendfile,
+};
+
+const struct file_operations ecryptfs_main_fops = {
+ .llseek = ecryptfs_llseek,
+ .read = do_sync_read,
+ .aio_read = ecryptfs_read_update_atime,
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
+ .readdir = ecryptfs_readdir,
+ .ioctl = ecryptfs_ioctl,
+ .mmap = generic_file_mmap,
+ .open = ecryptfs_open,
+ .flush = ecryptfs_flush,
+ .release = ecryptfs_release,
+ .fsync = ecryptfs_fsync,
+ .fasync = ecryptfs_fasync,
+ .sendfile = ecryptfs_sendfile,
+};
+
+static int
+ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int rc = 0;
+ struct file *lower_file = NULL;
+
+ if (ecryptfs_file_to_private(file))
+ lower_file = ecryptfs_file_to_lower(file);
+ if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
+ rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
+ lower_file, cmd, arg);
+ else
+ rc = -ENOTTY;
+ return rc;
+}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
new file mode 100644
index 000000000000..efdd2b7b62d7
--- /dev/null
+++ b/fs/ecryptfs/inode.c
@@ -0,0 +1,1079 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 1997-2004 Erez Zadok
+ * Copyright (C) 2001-2004 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Michael C. Thompsion <mcthomps@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/crypto.h>
+#include "ecryptfs_kernel.h"
+
+static struct dentry *lock_parent(struct dentry *dentry)
+{
+ struct dentry *dir;
+
+ dir = dget(dentry->d_parent);
+ mutex_lock(&(dir->d_inode->i_mutex));
+ return dir;
+}
+
+static void unlock_parent(struct dentry *dentry)
+{
+ mutex_unlock(&(dentry->d_parent->d_inode->i_mutex));
+ dput(dentry->d_parent);
+}
+
+static void unlock_dir(struct dentry *dir)
+{
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(dir);
+}
+
+void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src)
+{
+ i_size_write(dst, i_size_read((struct inode *)src));
+ dst->i_blocks = src->i_blocks;
+}
+
+void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src)
+{
+ dest->i_atime = src->i_atime;
+}
+
+static void ecryptfs_copy_attr_times(struct inode *dest,
+ const struct inode *src)
+{
+ dest->i_atime = src->i_atime;
+ dest->i_mtime = src->i_mtime;
+ dest->i_ctime = src->i_ctime;
+}
+
+static void ecryptfs_copy_attr_timesizes(struct inode *dest,
+ const struct inode *src)
+{
+ dest->i_atime = src->i_atime;
+ dest->i_mtime = src->i_mtime;
+ dest->i_ctime = src->i_ctime;
+ ecryptfs_copy_inode_size(dest, src);
+}
+
+void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src)
+{
+ dest->i_mode = src->i_mode;
+ dest->i_nlink = src->i_nlink;
+ dest->i_uid = src->i_uid;
+ dest->i_gid = src->i_gid;
+ dest->i_rdev = src->i_rdev;
+ dest->i_atime = src->i_atime;
+ dest->i_mtime = src->i_mtime;
+ dest->i_ctime = src->i_ctime;
+ dest->i_blkbits = src->i_blkbits;
+ dest->i_flags = src->i_flags;
+}
+
+/**
+ * ecryptfs_create_underlying_file
+ * @lower_dir_inode: inode of the parent in the lower fs of the new file
+ * @lower_dentry: New file's dentry in the lower fs
+ * @ecryptfs_dentry: New file's dentry in ecryptfs
+ * @mode: The mode of the new file
+ * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
+ *
+ * Creates the file in the lower file system.
+ *
+ * Returns zero on success; non-zero on error condition
+ */
+static int
+ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
+ struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+ struct dentry *dentry_save;
+ struct vfsmount *vfsmount_save;
+ int rc;
+
+ dentry_save = nd->dentry;
+ vfsmount_save = nd->mnt;
+ nd->dentry = lower_dentry;
+ nd->mnt = lower_mnt;
+ rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
+ nd->dentry = dentry_save;
+ nd->mnt = vfsmount_save;
+ return rc;
+}
+
+/**
+ * ecryptfs_do_create
+ * @directory_inode: inode of the new file's dentry's parent in ecryptfs
+ * @ecryptfs_dentry: New file's dentry in ecryptfs
+ * @mode: The mode of the new file
+ * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
+ *
+ * Creates the underlying file and the eCryptfs inode which will link to
+ * it. It will also update the eCryptfs directory inode to mimic the
+ * stat of the lower directory inode.
+ *
+ * Returns zero on success; non-zero on error condition
+ */
+static int
+ecryptfs_do_create(struct inode *directory_inode,
+ struct dentry *ecryptfs_dentry, int mode,
+ struct nameidata *nd)
+{
+ int rc;
+ struct dentry *lower_dentry;
+ struct dentry *lower_dir_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+ if (unlikely(IS_ERR(lower_dir_dentry))) {
+ ecryptfs_printk(KERN_ERR, "Error locking directory of "
+ "dentry\n");
+ rc = PTR_ERR(lower_dir_dentry);
+ goto out;
+ }
+ rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
+ ecryptfs_dentry, mode, nd);
+ if (unlikely(rc)) {
+ ecryptfs_printk(KERN_ERR,
+ "Failure to create underlying file\n");
+ goto out_lock;
+ }
+ rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
+ directory_inode->i_sb, 0);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n");
+ goto out_lock;
+ }
+ ecryptfs_copy_attr_timesizes(directory_inode,
+ lower_dir_dentry->d_inode);
+out_lock:
+ unlock_dir(lower_dir_dentry);
+out:
+ return rc;
+}
+
+/**
+ * grow_file
+ * @ecryptfs_dentry: the ecryptfs dentry
+ * @lower_file: The lower file
+ * @inode: The ecryptfs inode
+ * @lower_inode: The lower inode
+ *
+ * This is the code which will grow the file to its correct size.
+ */
+static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file,
+ struct inode *inode, struct inode *lower_inode)
+{
+ int rc = 0;
+ struct file fake_file;
+ struct ecryptfs_file_info tmp_file_info;
+
+ memset(&fake_file, 0, sizeof(fake_file));
+ fake_file.f_dentry = ecryptfs_dentry;
+ memset(&tmp_file_info, 0, sizeof(tmp_file_info));
+ ecryptfs_set_file_private(&fake_file, &tmp_file_info);
+ ecryptfs_set_file_lower(&fake_file, lower_file);
+ rc = ecryptfs_fill_zeros(&fake_file, 1);
+ if (rc) {
+ ECRYPTFS_SET_FLAG(
+ ecryptfs_inode_to_private(inode)->crypt_stat.flags,
+ ECRYPTFS_SECURITY_WARNING);
+ ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros "
+ "in file; rc = [%d]\n", rc);
+ goto out;
+ }
+ i_size_write(inode, 0);
+ ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
+ ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags,
+ ECRYPTFS_NEW_FILE);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_initialize_file
+ *
+ * Cause the file to be changed from a basic empty file to an ecryptfs
+ * file with a header and first data page.
+ *
+ * Returns zero on success
+ */
+static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
+{
+ int rc = 0;
+ int lower_flags;
+ struct ecryptfs_crypt_stat *crypt_stat;
+ struct dentry *lower_dentry;
+ struct dentry *tlower_dentry = NULL;
+ struct file *lower_file;
+ struct inode *inode, *lower_inode;
+ struct vfsmount *lower_mnt;
+
+ lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+ ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n",
+ lower_dentry->d_name.name);
+ inode = ecryptfs_dentry->d_inode;
+ crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+ tlower_dentry = dget(lower_dentry);
+ if (!tlower_dentry) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n");
+ goto out;
+ }
+ lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR;
+#if BITS_PER_LONG != 32
+ lower_flags |= O_LARGEFILE;
+#endif
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
+ mntget(lower_mnt);
+ /* Corresponding fput() at end of this function */
+ lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags);
+ if (IS_ERR(lower_file)) {
+ rc = PTR_ERR(lower_file);
+ ecryptfs_printk(KERN_ERR,
+ "Error opening dentry; rc = [%i]\n", rc);
+ goto out;
+ }
+ /* fput(lower_file) should handle the puts if we do this */
+ lower_file->f_dentry = tlower_dentry;
+ lower_file->f_vfsmnt = lower_mnt;
+ lower_inode = tlower_dentry->d_inode;
+ if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
+ ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
+ ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
+ goto out_fput;
+ }
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
+ ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
+ rc = ecryptfs_new_file_context(ecryptfs_dentry);
+ if (rc) {
+ ecryptfs_printk(KERN_DEBUG, "Error creating new file "
+ "context\n");
+ goto out_fput;
+ }
+ rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file);
+ if (rc) {
+ ecryptfs_printk(KERN_DEBUG, "Error writing headers\n");
+ goto out_fput;
+ }
+ rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode);
+out_fput:
+ fput(lower_file);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_create
+ * @dir: The inode of the directory in which to create the file.
+ * @dentry: The eCryptfs dentry
+ * @mode: The mode of the new file.
+ * @nd: nameidata
+ *
+ * Creates a new file.
+ *
+ * Returns zero on success; non-zero on error condition
+ */
+static int
+ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
+ int mode, struct nameidata *nd)
+{
+ int rc;
+
+ rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
+ if (unlikely(rc)) {
+ ecryptfs_printk(KERN_WARNING, "Failed to create file in"
+ "lower filesystem\n");
+ goto out;
+ }
+ /* At this point, a file exists on "disk"; we need to make sure
+ * that this on disk file is prepared to be an ecryptfs file */
+ rc = ecryptfs_initialize_file(ecryptfs_dentry);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_lookup
+ * @dir: inode
+ * @dentry: The dentry
+ * @nd: nameidata, may be NULL
+ *
+ * Find a file on disk. If the file does not exist, then we'll add it to the
+ * dentry cache and continue on to read it from the disk.
+ */
+static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ int rc = 0;
+ struct dentry *lower_dir_dentry;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
+ struct dentry *tlower_dentry = NULL;
+ char *encoded_name;
+ unsigned int encoded_namelen;
+ struct ecryptfs_crypt_stat *crypt_stat = NULL;
+ char *page_virt = NULL;
+ struct inode *lower_inode;
+ u64 file_size;
+
+ lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
+ dentry->d_op = &ecryptfs_dops;
+ if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, "."))
+ || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, "..")))
+ goto out_drop;
+ encoded_namelen = ecryptfs_encode_filename(crypt_stat,
+ dentry->d_name.name,
+ dentry->d_name.len,
+ &encoded_name);
+ if (encoded_namelen < 0) {
+ rc = encoded_namelen;
+ goto out_drop;
+ }
+ ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
+ "= [%d]\n", encoded_name, encoded_namelen);
+ lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
+ encoded_namelen - 1);
+ kfree(encoded_name);
+ lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
+ if (IS_ERR(lower_dentry)) {
+ ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
+ rc = PTR_ERR(lower_dentry);
+ goto out_drop;
+ }
+ ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->"
+ "d_name.name = [%s]\n", lower_dentry,
+ lower_dentry->d_name.name);
+ lower_inode = lower_dentry->d_inode;
+ ecryptfs_copy_attr_atime(dir, lower_dir_dentry->d_inode);
+ BUG_ON(!atomic_read(&lower_dentry->d_count));
+ ecryptfs_set_dentry_private(dentry,
+ kmem_cache_alloc(ecryptfs_dentry_info_cache,
+ SLAB_KERNEL));
+ if (!ecryptfs_dentry_to_private(dentry)) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting "
+ "to allocate ecryptfs_dentry_info struct\n");
+ goto out_dput;
+ }
+ ecryptfs_set_dentry_lower(dentry, lower_dentry);
+ ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
+ if (!lower_dentry->d_inode) {
+ /* We want to add because we couldn't find in lower */
+ d_add(dentry, NULL);
+ goto out;
+ }
+ rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error interposing\n");
+ goto out_dput;
+ }
+ if (S_ISDIR(lower_inode->i_mode)) {
+ ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
+ goto out;
+ }
+ if (S_ISLNK(lower_inode->i_mode)) {
+ ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
+ goto out;
+ }
+ if (!nd) {
+ ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
+ "as we *think* we are about to unlink\n");
+ goto out;
+ }
+ tlower_dentry = dget(lower_dentry);
+ if (!tlower_dentry || IS_ERR(tlower_dentry)) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n");
+ goto out_dput;
+ }
+ /* Released in this function */
+ page_virt =
+ (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
+ SLAB_USER);
+ if (!page_virt) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR,
+ "Cannot ecryptfs_kmalloc a page\n");
+ goto out_dput;
+ }
+ memset(page_virt, 0, PAGE_CACHE_SIZE);
+ rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt);
+ crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+ if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED))
+ ecryptfs_set_default_sizes(crypt_stat);
+ if (rc) {
+ rc = 0;
+ ecryptfs_printk(KERN_WARNING, "Error reading header region;"
+ " assuming unencrypted\n");
+ } else {
+ if (!contains_ecryptfs_marker(page_virt
+ + ECRYPTFS_FILE_SIZE_BYTES)) {
+ kmem_cache_free(ecryptfs_header_cache_2, page_virt);
+ goto out;
+ }
+ memcpy(&file_size, page_virt, sizeof(file_size));
+ file_size = be64_to_cpu(file_size);
+ i_size_write(dentry->d_inode, (loff_t)file_size);
+ }
+ kmem_cache_free(ecryptfs_header_cache_2, page_virt);
+ goto out;
+
+out_dput:
+ dput(lower_dentry);
+ if (tlower_dentry)
+ dput(tlower_dentry);
+out_drop:
+ d_drop(dentry);
+out:
+ return ERR_PTR(rc);
+}
+
+static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *new_dentry)
+{
+ struct dentry *lower_old_dentry;
+ struct dentry *lower_new_dentry;
+ struct dentry *lower_dir_dentry;
+ u64 file_size_save;
+ int rc;
+
+ file_size_save = i_size_read(old_dentry->d_inode);
+ lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
+ lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
+ dget(lower_old_dentry);
+ dget(lower_new_dentry);
+ lower_dir_dentry = lock_parent(lower_new_dentry);
+ rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
+ lower_new_dentry);
+ if (rc || !lower_new_dentry->d_inode)
+ goto out_lock;
+ rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
+ if (rc)
+ goto out_lock;
+ ecryptfs_copy_attr_timesizes(dir, lower_new_dentry->d_inode);
+ old_dentry->d_inode->i_nlink =
+ ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
+ i_size_write(new_dentry->d_inode, file_size_save);
+out_lock:
+ unlock_dir(lower_dir_dentry);
+ dput(lower_new_dentry);
+ dput(lower_old_dentry);
+ if (!new_dentry->d_inode)
+ d_drop(new_dentry);
+ return rc;
+}
+
+static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ int rc = 0;
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
+
+ lock_parent(lower_dentry);
+ rc = vfs_unlink(lower_dir_inode, lower_dentry);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n");
+ goto out_unlock;
+ }
+ ecryptfs_copy_attr_times(dir, lower_dir_inode);
+ dentry->d_inode->i_nlink =
+ ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink;
+ dentry->d_inode->i_ctime = dir->i_ctime;
+out_unlock:
+ unlock_parent(lower_dentry);
+ return rc;
+}
+
+static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ int rc;
+ struct dentry *lower_dentry;
+ struct dentry *lower_dir_dentry;
+ umode_t mode;
+ char *encoded_symname;
+ unsigned int encoded_symlen;
+ struct ecryptfs_crypt_stat *crypt_stat = NULL;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ dget(lower_dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+ mode = S_IALLUGO;
+ encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
+ strlen(symname),
+ &encoded_symname);
+ if (encoded_symlen < 0) {
+ rc = encoded_symlen;
+ goto out_lock;
+ }
+ rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
+ encoded_symname, mode);
+ kfree(encoded_symname);
+ if (rc || !lower_dentry->d_inode)
+ goto out_lock;
+ rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
+ if (rc)
+ goto out_lock;
+ ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
+out_lock:
+ unlock_dir(lower_dir_dentry);
+ dput(lower_dentry);
+ if (!dentry->d_inode)
+ d_drop(dentry);
+ return rc;
+}
+
+static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ int rc;
+ struct dentry *lower_dentry;
+ struct dentry *lower_dir_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+ rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode);
+ if (rc || !lower_dentry->d_inode)
+ goto out;
+ rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
+ if (rc)
+ goto out;
+ ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
+ dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
+out:
+ unlock_dir(lower_dir_dentry);
+ if (!dentry->d_inode)
+ d_drop(dentry);
+ return rc;
+}
+
+static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ int rc = 0;
+ struct dentry *tdentry = NULL;
+ struct dentry *lower_dentry;
+ struct dentry *tlower_dentry = NULL;
+ struct dentry *lower_dir_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (!(tdentry = dget(dentry))) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n",
+ dentry);
+ goto out;
+ }
+ lower_dir_dentry = lock_parent(lower_dentry);
+ if (!(tlower_dentry = dget(lower_dentry))) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry "
+ "[%p]\n", lower_dentry);
+ goto out;
+ }
+ rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
+ if (!rc) {
+ d_delete(tlower_dentry);
+ tlower_dentry = NULL;
+ }
+ ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode);
+ dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
+ unlock_dir(lower_dir_dentry);
+ if (!rc)
+ d_drop(dentry);
+out:
+ if (tdentry)
+ dput(tdentry);
+ if (tlower_dentry)
+ dput(tlower_dentry);
+ return rc;
+}
+
+static int
+ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+ int rc;
+ struct dentry *lower_dentry;
+ struct dentry *lower_dir_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+ rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
+ if (rc || !lower_dentry->d_inode)
+ goto out;
+ rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0);
+ if (rc)
+ goto out;
+ ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode);
+out:
+ unlock_dir(lower_dir_dentry);
+ if (!dentry->d_inode)
+ d_drop(dentry);
+ return rc;
+}
+
+static int
+ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ int rc;
+ struct dentry *lower_old_dentry;
+ struct dentry *lower_new_dentry;
+ struct dentry *lower_old_dir_dentry;
+ struct dentry *lower_new_dir_dentry;
+
+ lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
+ lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
+ dget(lower_old_dentry);
+ dget(lower_new_dentry);
+ lower_old_dir_dentry = dget_parent(lower_old_dentry);
+ lower_new_dir_dentry = dget_parent(lower_new_dentry);
+ lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
+ lower_new_dir_dentry->d_inode, lower_new_dentry);
+ if (rc)
+ goto out_lock;
+ ecryptfs_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
+ if (new_dir != old_dir)
+ ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
+out_lock:
+ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ dput(lower_new_dentry);
+ dput(lower_old_dentry);
+ return rc;
+}
+
+static int
+ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz)
+{
+ int rc;
+ struct dentry *lower_dentry;
+ char *decoded_name;
+ char *lower_buf;
+ mm_segment_t old_fs;
+ struct ecryptfs_crypt_stat *crypt_stat;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (!lower_dentry->d_inode->i_op ||
+ !lower_dentry->d_inode->i_op->readlink) {
+ rc = -EINVAL;
+ goto out;
+ }
+ /* Released in this function */
+ lower_buf = kmalloc(bufsiz, GFP_KERNEL);
+ if (lower_buf == NULL) {
+ ecryptfs_printk(KERN_ERR, "Out of memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ old_fs = get_fs();
+ set_fs(get_ds());
+ ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
+ "lower_dentry->d_name.name = [%s]\n",
+ lower_dentry->d_name.name);
+ rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
+ (char __user *)lower_buf,
+ bufsiz);
+ set_fs(old_fs);
+ if (rc >= 0) {
+ crypt_stat = NULL;
+ rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc,
+ &decoded_name);
+ if (rc == -ENOMEM)
+ goto out_free_lower_buf;
+ if (rc > 0) {
+ ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes "
+ "to userspace: [%*s]\n", rc,
+ decoded_name);
+ if (copy_to_user(buf, decoded_name, rc))
+ rc = -EFAULT;
+ }
+ kfree(decoded_name);
+ ecryptfs_copy_attr_atime(dentry->d_inode,
+ lower_dentry->d_inode);
+ }
+out_free_lower_buf:
+ kfree(lower_buf);
+out:
+ return rc;
+}
+
+static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ char *buf;
+ int len = PAGE_SIZE, rc;
+ mm_segment_t old_fs;
+
+ /* Released in ecryptfs_put_link(); only release here on error */
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ old_fs = get_fs();
+ set_fs(get_ds());
+ ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ "
+ "dentry->d_name.name = [%s]\n", dentry->d_name.name);
+ rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
+ buf[rc] = '\0';
+ set_fs(old_fs);
+ if (rc < 0)
+ goto out_free;
+ rc = 0;
+ nd_set_link(nd, buf);
+ goto out;
+out_free:
+ kfree(buf);
+out:
+ return ERR_PTR(rc);
+}
+
+static void
+ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
+{
+ /* Free the char* */
+ kfree(nd_get_link(nd));
+}
+
+/**
+ * upper_size_to_lower_size
+ * @crypt_stat: Crypt_stat associated with file
+ * @upper_size: Size of the upper file
+ *
+ * Calculate the requried size of the lower file based on the
+ * specified size of the upper file. This calculation is based on the
+ * number of headers in the underlying file and the extent size.
+ *
+ * Returns Calculated size of the lower file.
+ */
+static loff_t
+upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
+ loff_t upper_size)
+{
+ loff_t lower_size;
+
+ lower_size = ( crypt_stat->header_extent_size
+ * crypt_stat->num_header_extents_at_front );
+ if (upper_size != 0) {
+ loff_t num_extents;
+
+ num_extents = upper_size >> crypt_stat->extent_shift;
+ if (upper_size & ~crypt_stat->extent_mask)
+ num_extents++;
+ lower_size += (num_extents * crypt_stat->extent_size);
+ }
+ return lower_size;
+}
+
+/**
+ * ecryptfs_truncate
+ * @dentry: The ecryptfs layer dentry
+ * @new_length: The length to expand the file to
+ *
+ * Function to handle truncations modifying the size of the file. Note
+ * that the file sizes are interpolated. When expanding, we are simply
+ * writing strings of 0's out. When truncating, we need to modify the
+ * underlying file size according to the page index interpolations.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
+{
+ int rc = 0;
+ struct inode *inode = dentry->d_inode;
+ struct dentry *lower_dentry;
+ struct vfsmount *lower_mnt;
+ struct file fake_ecryptfs_file, *lower_file = NULL;
+ struct ecryptfs_crypt_stat *crypt_stat;
+ loff_t i_size = i_size_read(inode);
+ loff_t lower_size_before_truncate;
+ loff_t lower_size_after_truncate;
+
+ if (unlikely((new_length == i_size)))
+ goto out;
+ crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+ /* Set up a fake ecryptfs file, this is used to interface with
+ * the file in the underlying filesystem so that the
+ * truncation has an effect there as well. */
+ memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file));
+ fake_ecryptfs_file.f_dentry = dentry;
+ /* Released at out_free: label */
+ ecryptfs_set_file_private(&fake_ecryptfs_file,
+ kmem_cache_alloc(ecryptfs_file_info_cache,
+ SLAB_KERNEL));
+ if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ /* This dget & mntget is released through fput at out_fput: */
+ dget(lower_dentry);
+ lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
+ mntget(lower_mnt);
+ lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR);
+ if (unlikely(IS_ERR(lower_file))) {
+ rc = PTR_ERR(lower_file);
+ goto out_free;
+ }
+ ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file);
+ /* Switch on growing or shrinking file */
+ if (new_length > i_size) {
+ rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR,
+ "Problem with fill_zeros\n");
+ goto out_fput;
+ }
+ i_size_write(inode, new_length);
+ rc = ecryptfs_write_inode_size_to_header(lower_file,
+ lower_dentry->d_inode,
+ inode);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR,
+ "Problem with ecryptfs_write"
+ "_inode_size\n");
+ goto out_fput;
+ }
+ } else { /* new_length < i_size_read(inode) */
+ vmtruncate(inode, new_length);
+ ecryptfs_write_inode_size_to_header(lower_file,
+ lower_dentry->d_inode,
+ inode);
+ /* We are reducing the size of the ecryptfs file, and need to
+ * know if we need to reduce the size of the lower file. */
+ lower_size_before_truncate =
+ upper_size_to_lower_size(crypt_stat, i_size);
+ lower_size_after_truncate =
+ upper_size_to_lower_size(crypt_stat, new_length);
+ if (lower_size_after_truncate < lower_size_before_truncate)
+ vmtruncate(lower_dentry->d_inode,
+ lower_size_after_truncate);
+ }
+ /* Update the access times */
+ lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime
+ = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+out_fput:
+ fput(lower_file);
+out_free:
+ if (ecryptfs_file_to_private(&fake_ecryptfs_file))
+ kmem_cache_free(ecryptfs_file_info_cache,
+ ecryptfs_file_to_private(&fake_ecryptfs_file));
+out:
+ return rc;
+}
+
+static int
+ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ int rc;
+
+ if (nd) {
+ struct vfsmount *vfsmnt_save = nd->mnt;
+ struct dentry *dentry_save = nd->dentry;
+
+ nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry);
+ nd->dentry = ecryptfs_dentry_to_lower(nd->dentry);
+ rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
+ nd->mnt = vfsmnt_save;
+ nd->dentry = dentry_save;
+ } else
+ rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
+ return rc;
+}
+
+/**
+ * ecryptfs_setattr
+ * @dentry: dentry handle to the inode to modify
+ * @ia: Structure with flags of what to change and values
+ *
+ * Updates the metadata of an inode. If the update is to the size
+ * i.e. truncation, then ecryptfs_truncate will handle the size modification
+ * of both the ecryptfs inode and the lower inode.
+ *
+ * All other metadata changes will be passed right to the lower filesystem,
+ * and we will just update our inode to look like the lower.
+ */
+static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+ int rc = 0;
+ struct dentry *lower_dentry;
+ struct inode *inode;
+ struct inode *lower_inode;
+ struct ecryptfs_crypt_stat *crypt_stat;
+
+ crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ inode = dentry->d_inode;
+ lower_inode = ecryptfs_inode_to_lower(inode);
+ if (ia->ia_valid & ATTR_SIZE) {
+ ecryptfs_printk(KERN_DEBUG,
+ "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
+ ia->ia_valid, ATTR_SIZE);
+ rc = ecryptfs_truncate(dentry, ia->ia_size);
+ /* ecryptfs_truncate handles resizing of the lower file */
+ ia->ia_valid &= ~ATTR_SIZE;
+ ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
+ ia->ia_valid);
+ if (rc < 0)
+ goto out;
+ }
+ rc = notify_change(lower_dentry, ia);
+out:
+ ecryptfs_copy_attr_all(inode, lower_inode);
+ return rc;
+}
+
+static int
+ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags)
+{
+ int rc = 0;
+ struct dentry *lower_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (!lower_dentry->d_inode->i_op->setxattr) {
+ rc = -ENOSYS;
+ goto out;
+ }
+ mutex_lock(&lower_dentry->d_inode->i_mutex);
+ rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
+ size, flags);
+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
+out:
+ return rc;
+}
+
+static ssize_t
+ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
+ size_t size)
+{
+ int rc = 0;
+ struct dentry *lower_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (!lower_dentry->d_inode->i_op->getxattr) {
+ rc = -ENOSYS;
+ goto out;
+ }
+ mutex_lock(&lower_dentry->d_inode->i_mutex);
+ rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value,
+ size);
+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
+out:
+ return rc;
+}
+
+static ssize_t
+ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ int rc = 0;
+ struct dentry *lower_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (!lower_dentry->d_inode->i_op->listxattr) {
+ rc = -ENOSYS;
+ goto out;
+ }
+ mutex_lock(&lower_dentry->d_inode->i_mutex);
+ rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size);
+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
+out:
+ return rc;
+}
+
+static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
+{
+ int rc = 0;
+ struct dentry *lower_dentry;
+
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ if (!lower_dentry->d_inode->i_op->removexattr) {
+ rc = -ENOSYS;
+ goto out;
+ }
+ mutex_lock(&lower_dentry->d_inode->i_mutex);
+ rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name);
+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
+out:
+ return rc;
+}
+
+int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode)
+{
+ if ((ecryptfs_inode_to_lower(inode)
+ == (struct inode *)candidate_lower_inode))
+ return 1;
+ else
+ return 0;
+}
+
+int ecryptfs_inode_set(struct inode *inode, void *lower_inode)
+{
+ ecryptfs_init_inode(inode, (struct inode *)lower_inode);
+ return 0;
+}
+
+struct inode_operations ecryptfs_symlink_iops = {
+ .readlink = ecryptfs_readlink,
+ .follow_link = ecryptfs_follow_link,
+ .put_link = ecryptfs_put_link,
+ .permission = ecryptfs_permission,
+ .setattr = ecryptfs_setattr,
+ .setxattr = ecryptfs_setxattr,
+ .getxattr = ecryptfs_getxattr,
+ .listxattr = ecryptfs_listxattr,
+ .removexattr = ecryptfs_removexattr
+};
+
+struct inode_operations ecryptfs_dir_iops = {
+ .create = ecryptfs_create,
+ .lookup = ecryptfs_lookup,
+ .link = ecryptfs_link,
+ .unlink = ecryptfs_unlink,
+ .symlink = ecryptfs_symlink,
+ .mkdir = ecryptfs_mkdir,
+ .rmdir = ecryptfs_rmdir,
+ .mknod = ecryptfs_mknod,
+ .rename = ecryptfs_rename,
+ .permission = ecryptfs_permission,
+ .setattr = ecryptfs_setattr,
+ .setxattr = ecryptfs_setxattr,
+ .getxattr = ecryptfs_getxattr,
+ .listxattr = ecryptfs_listxattr,
+ .removexattr = ecryptfs_removexattr
+};
+
+struct inode_operations ecryptfs_main_iops = {
+ .permission = ecryptfs_permission,
+ .setattr = ecryptfs_setattr,
+ .setxattr = ecryptfs_setxattr,
+ .getxattr = ecryptfs_getxattr,
+ .listxattr = ecryptfs_listxattr,
+ .removexattr = ecryptfs_removexattr
+};
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
new file mode 100644
index 000000000000..ba454785a0c5
--- /dev/null
+++ b/fs/ecryptfs/keystore.c
@@ -0,0 +1,1061 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ * In-kernel key management code. Includes functions to parse and
+ * write authentication token-related packets with the underlying
+ * file.
+ *
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
+ * Michael C. Thompson <mcthomps@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/pagemap.h>
+#include <linux/key.h>
+#include <linux/random.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include "ecryptfs_kernel.h"
+
+/**
+ * request_key returned an error instead of a valid key address;
+ * determine the type of error, make appropriate log entries, and
+ * return an error code.
+ */
+int process_request_key_err(long err_code)
+{
+ int rc = 0;
+
+ switch (err_code) {
+ case ENOKEY:
+ ecryptfs_printk(KERN_WARNING, "No key\n");
+ rc = -ENOENT;
+ break;
+ case EKEYEXPIRED:
+ ecryptfs_printk(KERN_WARNING, "Key expired\n");
+ rc = -ETIME;
+ break;
+ case EKEYREVOKED:
+ ecryptfs_printk(KERN_WARNING, "Key revoked\n");
+ rc = -EINVAL;
+ break;
+ default:
+ ecryptfs_printk(KERN_WARNING, "Unknown error code: "
+ "[0x%.16x]\n", err_code);
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
+{
+ struct list_head *walker;
+ struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
+
+ walker = auth_tok_list_head->next;
+ while (walker != auth_tok_list_head) {
+ auth_tok_list_item =
+ list_entry(walker, struct ecryptfs_auth_tok_list_item,
+ list);
+ walker = auth_tok_list_item->list.next;
+ memset(auth_tok_list_item, 0,
+ sizeof(struct ecryptfs_auth_tok_list_item));
+ kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
+ auth_tok_list_item);
+ }
+}
+
+struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
+
+/**
+ * parse_packet_length
+ * @data: Pointer to memory containing length at offset
+ * @size: This function writes the decoded size to this memory
+ * address; zero on error
+ * @length_size: The number of bytes occupied by the encoded length
+ *
+ * Returns Zero on success
+ */
+static int parse_packet_length(unsigned char *data, size_t *size,
+ size_t *length_size)
+{
+ int rc = 0;
+
+ (*length_size) = 0;
+ (*size) = 0;
+ if (data[0] < 192) {
+ /* One-byte length */
+ (*size) = data[0];
+ (*length_size) = 1;
+ } else if (data[0] < 224) {
+ /* Two-byte length */
+ (*size) = ((data[0] - 192) * 256);
+ (*size) += (data[1] + 192);
+ (*length_size) = 2;
+ } else if (data[0] == 255) {
+ /* Five-byte length; we're not supposed to see this */
+ ecryptfs_printk(KERN_ERR, "Five-byte packet length not "
+ "supported\n");
+ rc = -EINVAL;
+ goto out;
+ } else {
+ ecryptfs_printk(KERN_ERR, "Error parsing packet length\n");
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ return rc;
+}
+
+/**
+ * write_packet_length
+ * @dest: The byte array target into which to write the
+ * length. Must have at least 5 bytes allocated.
+ * @size: The length to write.
+ * @packet_size_length: The number of bytes used to encode the
+ * packet length is written to this address.
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int write_packet_length(char *dest, size_t size,
+ size_t *packet_size_length)
+{
+ int rc = 0;
+
+ if (size < 192) {
+ dest[0] = size;
+ (*packet_size_length) = 1;
+ } else if (size < 65536) {
+ dest[0] = (((size - 192) / 256) + 192);
+ dest[1] = ((size - 192) % 256);
+ (*packet_size_length) = 2;
+ } else {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_WARNING,
+ "Unsupported packet size: [%d]\n", size);
+ }
+ return rc;
+}
+
+/**
+ * parse_tag_3_packet
+ * @crypt_stat: The cryptographic context to modify based on packet
+ * contents.
+ * @data: The raw bytes of the packet.
+ * @auth_tok_list: eCryptfs parses packets into authentication tokens;
+ * a new authentication token will be placed at the end
+ * of this list for this packet.
+ * @new_auth_tok: Pointer to a pointer to memory that this function
+ * allocates; sets the memory address of the pointer to
+ * NULL on error. This object is added to the
+ * auth_tok_list.
+ * @packet_size: This function writes the size of the parsed packet
+ * into this memory location; zero on error.
+ * @max_packet_size: maximum number of bytes to parse
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int
+parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
+ unsigned char *data, struct list_head *auth_tok_list,
+ struct ecryptfs_auth_tok **new_auth_tok,
+ size_t *packet_size, size_t max_packet_size)
+{
+ int rc = 0;
+ size_t body_size;
+ struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
+ size_t length_size;
+
+ (*packet_size) = 0;
+ (*new_auth_tok) = NULL;
+
+ /* we check that:
+ * one byte for the Tag 3 ID flag
+ * two bytes for the body size
+ * do not exceed the maximum_packet_size
+ */
+ if (unlikely((*packet_size) + 3 > max_packet_size)) {
+ ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* check for Tag 3 identifyer - one byte */
+ if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) {
+ ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n",
+ ECRYPTFS_TAG_3_PACKET_TYPE);
+ rc = -EINVAL;
+ goto out;
+ }
+ /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
+ * at end of function upon failure */
+ auth_tok_list_item =
+ kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL);
+ if (!auth_tok_list_item) {
+ ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(auth_tok_list_item, 0,
+ sizeof(struct ecryptfs_auth_tok_list_item));
+ (*new_auth_tok) = &auth_tok_list_item->auth_tok;
+
+ /* check for body size - one to two bytes */
+ rc = parse_packet_length(&data[(*packet_size)], &body_size,
+ &length_size);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
+ "rc = [%d]\n", rc);
+ goto out_free;
+ }
+ if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) {
+ ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
+ body_size);
+ rc = -EINVAL;
+ goto out_free;
+ }
+ (*packet_size) += length_size;
+
+ /* now we know the length of the remainting Tag 3 packet size:
+ * 5 fix bytes for: version string, cipher, S2K ID, hash algo,
+ * number of hash iterations
+ * ECRYPTFS_SALT_SIZE bytes for salt
+ * body_size bytes minus the stuff above is the encrypted key size
+ */
+ if (unlikely((*packet_size) + body_size > max_packet_size)) {
+ ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+ rc = -EINVAL;
+ goto out_free;
+ }
+
+ /* There are 5 characters of additional information in the
+ * packet */
+ (*new_auth_tok)->session_key.encrypted_key_size =
+ body_size - (0x05 + ECRYPTFS_SALT_SIZE);
+ ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
+ (*new_auth_tok)->session_key.encrypted_key_size);
+
+ /* Version 4 (from RFC2440) - one byte */
+ if (unlikely(data[(*packet_size)++] != 0x04)) {
+ ecryptfs_printk(KERN_DEBUG, "Unknown version number "
+ "[%d]\n", data[(*packet_size) - 1]);
+ rc = -EINVAL;
+ goto out_free;
+ }
+
+ /* cipher - one byte */
+ ecryptfs_cipher_code_to_string(crypt_stat->cipher,
+ (u16)data[(*packet_size)]);
+ /* A little extra work to differentiate among the AES key
+ * sizes; see RFC2440 */
+ switch(data[(*packet_size)++]) {
+ case RFC2440_CIPHER_AES_192:
+ crypt_stat->key_size = 24;
+ break;
+ default:
+ crypt_stat->key_size =
+ (*new_auth_tok)->session_key.encrypted_key_size;
+ }
+ ecryptfs_init_crypt_ctx(crypt_stat);
+ /* S2K identifier 3 (from RFC2440) */
+ if (unlikely(data[(*packet_size)++] != 0x03)) {
+ ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently "
+ "supported\n");
+ rc = -ENOSYS;
+ goto out_free;
+ }
+
+ /* TODO: finish the hash mapping */
+ /* hash algorithm - one byte */
+ switch (data[(*packet_size)++]) {
+ case 0x01: /* See RFC2440 for these numbers and their mappings */
+ /* Choose MD5 */
+ /* salt - ECRYPTFS_SALT_SIZE bytes */
+ memcpy((*new_auth_tok)->token.password.salt,
+ &data[(*packet_size)], ECRYPTFS_SALT_SIZE);
+ (*packet_size) += ECRYPTFS_SALT_SIZE;
+
+ /* This conversion was taken straight from RFC2440 */
+ /* number of hash iterations - one byte */
+ (*new_auth_tok)->token.password.hash_iterations =
+ ((u32) 16 + (data[(*packet_size)] & 15))
+ << ((data[(*packet_size)] >> 4) + 6);
+ (*packet_size)++;
+
+ /* encrypted session key -
+ * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */
+ memcpy((*new_auth_tok)->session_key.encrypted_key,
+ &data[(*packet_size)],
+ (*new_auth_tok)->session_key.encrypted_key_size);
+ (*packet_size) +=
+ (*new_auth_tok)->session_key.encrypted_key_size;
+ (*new_auth_tok)->session_key.flags &=
+ ~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
+ (*new_auth_tok)->session_key.flags |=
+ ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
+ (*new_auth_tok)->token.password.hash_algo = 0x01;
+ break;
+ default:
+ ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: "
+ "[%d]\n", data[(*packet_size) - 1]);
+ rc = -ENOSYS;
+ goto out_free;
+ }
+ (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD;
+ /* TODO: Parametarize; we might actually want userspace to
+ * decrypt the session key. */
+ ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
+ ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
+ ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
+ ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
+ list_add(&auth_tok_list_item->list, auth_tok_list);
+ goto out;
+out_free:
+ (*new_auth_tok) = NULL;
+ memset(auth_tok_list_item, 0,
+ sizeof(struct ecryptfs_auth_tok_list_item));
+ kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
+ auth_tok_list_item);
+out:
+ if (rc)
+ (*packet_size) = 0;
+ return rc;
+}
+
+/**
+ * parse_tag_11_packet
+ * @data: The raw bytes of the packet
+ * @contents: This function writes the data contents of the literal
+ * packet into this memory location
+ * @max_contents_bytes: The maximum number of bytes that this function
+ * is allowed to write into contents
+ * @tag_11_contents_size: This function writes the size of the parsed
+ * contents into this memory location; zero on
+ * error
+ * @packet_size: This function writes the size of the parsed packet
+ * into this memory location; zero on error
+ * @max_packet_size: maximum number of bytes to parse
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int
+parse_tag_11_packet(unsigned char *data, unsigned char *contents,
+ size_t max_contents_bytes, size_t *tag_11_contents_size,
+ size_t *packet_size, size_t max_packet_size)
+{
+ int rc = 0;
+ size_t body_size;
+ size_t length_size;
+
+ (*packet_size) = 0;
+ (*tag_11_contents_size) = 0;
+
+ /* check that:
+ * one byte for the Tag 11 ID flag
+ * two bytes for the Tag 11 length
+ * do not exceed the maximum_packet_size
+ */
+ if (unlikely((*packet_size) + 3 > max_packet_size)) {
+ ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* check for Tag 11 identifyer - one byte */
+ if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) {
+ ecryptfs_printk(KERN_WARNING,
+ "Invalid tag 11 packet format\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* get Tag 11 content length - one or two bytes */
+ rc = parse_packet_length(&data[(*packet_size)], &body_size,
+ &length_size);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING,
+ "Invalid tag 11 packet format\n");
+ goto out;
+ }
+ (*packet_size) += length_size;
+
+ if (body_size < 13) {
+ ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
+ body_size);
+ rc = -EINVAL;
+ goto out;
+ }
+ /* We have 13 bytes of surrounding packet values */
+ (*tag_11_contents_size) = (body_size - 13);
+
+ /* now we know the length of the remainting Tag 11 packet size:
+ * 14 fix bytes for: special flag one, special flag two,
+ * 12 skipped bytes
+ * body_size bytes minus the stuff above is the Tag 11 content
+ */
+ /* FIXME why is the body size one byte smaller than the actual
+ * size of the body?
+ * this seems to be an error here as well as in
+ * write_tag_11_packet() */
+ if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) {
+ ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* special flag one - one byte */
+ if (data[(*packet_size)++] != 0x62) {
+ ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* special flag two - one byte */
+ if (data[(*packet_size)++] != 0x08) {
+ ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* skip the next 12 bytes */
+ (*packet_size) += 12; /* We don't care about the filename or
+ * the timestamp */
+
+ /* get the Tag 11 contents - tag_11_contents_size bytes */
+ memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size));
+ (*packet_size) += (*tag_11_contents_size);
+
+out:
+ if (rc) {
+ (*packet_size) = 0;
+ (*tag_11_contents_size) = 0;
+ }
+ return rc;
+}
+
+/**
+ * decrypt_session_key - Decrypt the session key with the given auth_tok.
+ *
+ * Returns Zero on success; non-zero error otherwise.
+ */
+static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
+ struct ecryptfs_crypt_stat *crypt_stat)
+{
+ int rc = 0;
+ struct ecryptfs_password *password_s_ptr;
+ struct crypto_tfm *tfm = NULL;
+ struct scatterlist src_sg[2], dst_sg[2];
+ struct mutex *tfm_mutex = NULL;
+ /* TODO: Use virt_to_scatterlist for these */
+ char *encrypted_session_key;
+ char *session_key;
+
+ password_s_ptr = &auth_tok->token.password;
+ if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags,
+ ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET))
+ ecryptfs_printk(KERN_DEBUG, "Session key encryption key "
+ "set; skipping key generation\n");
+ ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])"
+ ":\n",
+ password_s_ptr->session_key_encryption_key_bytes);
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key,
+ password_s_ptr->
+ session_key_encryption_key_bytes);
+ if (!strcmp(crypt_stat->cipher,
+ crypt_stat->mount_crypt_stat->global_default_cipher_name)
+ && crypt_stat->mount_crypt_stat->global_key_tfm) {
+ tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
+ tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
+ } else {
+ tfm = crypto_alloc_tfm(crypt_stat->cipher,
+ CRYPTO_TFM_REQ_WEAK_KEY);
+ if (!tfm) {
+ printk(KERN_ERR "Error allocating crypto context\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
+ if (password_s_ptr->session_key_encryption_key_bytes
+ < crypto_tfm_alg_min_keysize(tfm)) {
+ printk(KERN_WARNING "Session key encryption key is [%d] bytes; "
+ "minimum keysize for selected cipher is [%d] bytes.\n",
+ password_s_ptr->session_key_encryption_key_bytes,
+ crypto_tfm_alg_min_keysize(tfm));
+ rc = -EINVAL;
+ goto out;
+ }
+ if (tfm_mutex)
+ mutex_lock(tfm_mutex);
+ crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key,
+ crypt_stat->key_size);
+ /* TODO: virt_to_scatterlist */
+ encrypted_session_key = (char *)__get_free_page(GFP_KERNEL);
+ if (!encrypted_session_key) {
+ ecryptfs_printk(KERN_ERR, "Out of memory\n");
+ rc = -ENOMEM;
+ goto out_free_tfm;
+ }
+ session_key = (char *)__get_free_page(GFP_KERNEL);
+ if (!session_key) {
+ kfree(encrypted_session_key);
+ ecryptfs_printk(KERN_ERR, "Out of memory\n");
+ rc = -ENOMEM;
+ goto out_free_tfm;
+ }
+ memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key,
+ auth_tok->session_key.encrypted_key_size);
+ src_sg[0].page = virt_to_page(encrypted_session_key);
+ src_sg[0].offset = 0;
+ BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE);
+ src_sg[0].length = auth_tok->session_key.encrypted_key_size;
+ dst_sg[0].page = virt_to_page(session_key);
+ dst_sg[0].offset = 0;
+ auth_tok->session_key.decrypted_key_size =
+ auth_tok->session_key.encrypted_key_size;
+ dst_sg[0].length = auth_tok->session_key.encrypted_key_size;
+ /* TODO: Handle error condition */
+ crypto_cipher_decrypt(tfm, dst_sg, src_sg,
+ auth_tok->session_key.encrypted_key_size);
+ auth_tok->session_key.decrypted_key_size =
+ auth_tok->session_key.encrypted_key_size;
+ memcpy(auth_tok->session_key.decrypted_key, session_key,
+ auth_tok->session_key.decrypted_key_size);
+ auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
+ memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
+ auth_tok->session_key.decrypted_key_size);
+ ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_hex(crypt_stat->key,
+ crypt_stat->key_size);
+ memset(encrypted_session_key, 0, PAGE_CACHE_SIZE);
+ free_page((unsigned long)encrypted_session_key);
+ memset(session_key, 0, PAGE_CACHE_SIZE);
+ free_page((unsigned long)session_key);
+out_free_tfm:
+ if (tfm_mutex)
+ mutex_unlock(tfm_mutex);
+ else
+ crypto_free_tfm(tfm);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_parse_packet_set
+ * @dest: The header page in memory
+ * @version: Version of file format, to guide parsing behavior
+ *
+ * Get crypt_stat to have the file's session key if the requisite key
+ * is available to decrypt the session key.
+ *
+ * Returns Zero if a valid authentication token was retrieved and
+ * processed; negative value for file not encrypted or for error
+ * conditions.
+ */
+int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
+ unsigned char *src,
+ struct dentry *ecryptfs_dentry)
+{
+ size_t i = 0;
+ int rc = 0;
+ size_t found_auth_tok = 0;
+ size_t next_packet_is_auth_tok_packet;
+ char sig[ECRYPTFS_SIG_SIZE_HEX];
+ struct list_head auth_tok_list;
+ struct list_head *walker;
+ struct ecryptfs_auth_tok *chosen_auth_tok = NULL;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+ &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ struct ecryptfs_auth_tok *candidate_auth_tok = NULL;
+ size_t packet_size;
+ struct ecryptfs_auth_tok *new_auth_tok;
+ unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE];
+ size_t tag_11_contents_size;
+ size_t tag_11_packet_size;
+
+ INIT_LIST_HEAD(&auth_tok_list);
+ /* Parse the header to find as many packets as we can, these will be
+ * added the our &auth_tok_list */
+ next_packet_is_auth_tok_packet = 1;
+ while (next_packet_is_auth_tok_packet) {
+ size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i);
+
+ switch (src[i]) {
+ case ECRYPTFS_TAG_3_PACKET_TYPE:
+ rc = parse_tag_3_packet(crypt_stat,
+ (unsigned char *)&src[i],
+ &auth_tok_list, &new_auth_tok,
+ &packet_size, max_packet_size);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error parsing "
+ "tag 3 packet\n");
+ rc = -EIO;
+ goto out_wipe_list;
+ }
+ i += packet_size;
+ rc = parse_tag_11_packet((unsigned char *)&src[i],
+ sig_tmp_space,
+ ECRYPTFS_SIG_SIZE,
+ &tag_11_contents_size,
+ &tag_11_packet_size,
+ max_packet_size);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "No valid "
+ "(ecryptfs-specific) literal "
+ "packet containing "
+ "authentication token "
+ "signature found after "
+ "tag 3 packet\n");
+ rc = -EIO;
+ goto out_wipe_list;
+ }
+ i += tag_11_packet_size;
+ if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) {
+ ecryptfs_printk(KERN_ERR, "Expected "
+ "signature of size [%d]; "
+ "read size [%d]\n",
+ ECRYPTFS_SIG_SIZE,
+ tag_11_contents_size);
+ rc = -EIO;
+ goto out_wipe_list;
+ }
+ ecryptfs_to_hex(new_auth_tok->token.password.signature,
+ sig_tmp_space, tag_11_contents_size);
+ new_auth_tok->token.password.signature[
+ ECRYPTFS_PASSWORD_SIG_SIZE] = '\0';
+ ECRYPTFS_SET_FLAG(crypt_stat->flags,
+ ECRYPTFS_ENCRYPTED);
+ break;
+ case ECRYPTFS_TAG_11_PACKET_TYPE:
+ ecryptfs_printk(KERN_WARNING, "Invalid packet set "
+ "(Tag 11 not allowed by itself)\n");
+ rc = -EIO;
+ goto out_wipe_list;
+ break;
+ default:
+ ecryptfs_printk(KERN_DEBUG, "No packet at offset "
+ "[%d] of the file header; hex value of "
+ "character is [0x%.2x]\n", i, src[i]);
+ next_packet_is_auth_tok_packet = 0;
+ }
+ }
+ if (list_empty(&auth_tok_list)) {
+ rc = -EINVAL; /* Do not support non-encrypted files in
+ * the 0.1 release */
+ goto out;
+ }
+ /* If we have a global auth tok, then we should try to use
+ * it */
+ if (mount_crypt_stat->global_auth_tok) {
+ memcpy(sig, mount_crypt_stat->global_auth_tok_sig,
+ ECRYPTFS_SIG_SIZE_HEX);
+ chosen_auth_tok = mount_crypt_stat->global_auth_tok;
+ } else
+ BUG(); /* We should always have a global auth tok in
+ * the 0.1 release */
+ /* Scan list to see if our chosen_auth_tok works */
+ list_for_each(walker, &auth_tok_list) {
+ struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
+ auth_tok_list_item =
+ list_entry(walker, struct ecryptfs_auth_tok_list_item,
+ list);
+ candidate_auth_tok = &auth_tok_list_item->auth_tok;
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG,
+ "Considering cadidate auth tok:\n");
+ ecryptfs_dump_auth_tok(candidate_auth_tok);
+ }
+ /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */
+ if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD
+ && !strncmp(candidate_auth_tok->token.password.signature,
+ sig, ECRYPTFS_SIG_SIZE_HEX)) {
+ found_auth_tok = 1;
+ goto leave_list;
+ /* TODO: Transfer the common salt into the
+ * crypt_stat salt */
+ }
+ }
+leave_list:
+ if (!found_auth_tok) {
+ ecryptfs_printk(KERN_ERR, "Could not find authentication "
+ "token on temporary list for sig [%.*s]\n",
+ ECRYPTFS_SIG_SIZE_HEX, sig);
+ rc = -EIO;
+ goto out_wipe_list;
+ } else {
+ memcpy(&(candidate_auth_tok->token.password),
+ &(chosen_auth_tok->token.password),
+ sizeof(struct ecryptfs_password));
+ rc = decrypt_session_key(candidate_auth_tok, crypt_stat);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error decrypting the "
+ "session key\n");
+ goto out_wipe_list;
+ }
+ rc = ecryptfs_compute_root_iv(crypt_stat);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error computing "
+ "the root IV\n");
+ goto out_wipe_list;
+ }
+ }
+ rc = ecryptfs_init_crypt_ctx(crypt_stat);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error initializing crypto "
+ "context for cipher [%s]; rc = [%d]\n",
+ crypt_stat->cipher, rc);
+ }
+out_wipe_list:
+ wipe_auth_tok_list(&auth_tok_list);
+out:
+ return rc;
+}
+
+/**
+ * write_tag_11_packet
+ * @dest: Target into which Tag 11 packet is to be written
+ * @max: Maximum packet length
+ * @contents: Byte array of contents to copy in
+ * @contents_length: Number of bytes in contents
+ * @packet_length: Length of the Tag 11 packet written; zero on error
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int
+write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
+ size_t *packet_length)
+{
+ int rc = 0;
+ size_t packet_size_length;
+
+ (*packet_length) = 0;
+ if ((13 + contents_length) > max) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "Packet length larger than "
+ "maximum allowable\n");
+ goto out;
+ }
+ /* General packet header */
+ /* Packet tag */
+ dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
+ /* Packet length */
+ rc = write_packet_length(&dest[(*packet_length)],
+ (13 + contents_length), &packet_size_length);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet "
+ "header; cannot generate packet length\n");
+ goto out;
+ }
+ (*packet_length) += packet_size_length;
+ /* Tag 11 specific */
+ /* One-octet field that describes how the data is formatted */
+ dest[(*packet_length)++] = 0x62; /* binary data */
+ /* One-octet filename length followed by filename */
+ dest[(*packet_length)++] = 8;
+ memcpy(&dest[(*packet_length)], "_CONSOLE", 8);
+ (*packet_length) += 8;
+ /* Four-octet number indicating modification date */
+ memset(&dest[(*packet_length)], 0x00, 4);
+ (*packet_length) += 4;
+ /* Remainder is literal data */
+ memcpy(&dest[(*packet_length)], contents, contents_length);
+ (*packet_length) += contents_length;
+ out:
+ if (rc)
+ (*packet_length) = 0;
+ return rc;
+}
+
+/**
+ * write_tag_3_packet
+ * @dest: Buffer into which to write the packet
+ * @max: Maximum number of bytes that can be written
+ * @auth_tok: Authentication token
+ * @crypt_stat: The cryptographic context
+ * @key_rec: encrypted key
+ * @packet_size: This function will write the number of bytes that end
+ * up constituting the packet; set to zero on error
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int
+write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct ecryptfs_key_record *key_rec, size_t *packet_size)
+{
+ int rc = 0;
+
+ size_t i;
+ size_t signature_is_valid = 0;
+ size_t encrypted_session_key_valid = 0;
+ char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
+ struct scatterlist dest_sg[2];
+ struct scatterlist src_sg[2];
+ struct crypto_tfm *tfm = NULL;
+ struct mutex *tfm_mutex = NULL;
+ size_t key_rec_size;
+ size_t packet_size_length;
+ size_t cipher_code;
+
+ (*packet_size) = 0;
+ /* Check for a valid signature on the auth_tok */
+ for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++)
+ signature_is_valid |= auth_tok->token.password.signature[i];
+ if (!signature_is_valid)
+ BUG();
+ ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature,
+ ECRYPTFS_SIG_SIZE);
+ encrypted_session_key_valid = 0;
+ for (i = 0; i < crypt_stat->key_size; i++)
+ encrypted_session_key_valid |=
+ auth_tok->session_key.encrypted_key[i];
+ if (encrypted_session_key_valid) {
+ memcpy((*key_rec).enc_key,
+ auth_tok->session_key.encrypted_key,
+ auth_tok->session_key.encrypted_key_size);
+ goto encrypted_session_key_set;
+ }
+ if (auth_tok->session_key.encrypted_key_size == 0)
+ auth_tok->session_key.encrypted_key_size =
+ crypt_stat->key_size;
+ if (crypt_stat->key_size == 24
+ && strcmp("aes", crypt_stat->cipher) == 0) {
+ memset((crypt_stat->key + 24), 0, 8);
+ auth_tok->session_key.encrypted_key_size = 32;
+ }
+ (*key_rec).enc_key_size =
+ auth_tok->session_key.encrypted_key_size;
+ if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
+ ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) {
+ ecryptfs_printk(KERN_DEBUG, "Using previously generated "
+ "session key encryption key of size [%d]\n",
+ auth_tok->token.password.
+ session_key_encryption_key_bytes);
+ memcpy(session_key_encryption_key,
+ auth_tok->token.password.session_key_encryption_key,
+ crypt_stat->key_size);
+ ecryptfs_printk(KERN_DEBUG,
+ "Cached session key " "encryption key: \n");
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_hex(session_key_encryption_key, 16);
+ }
+ if (unlikely(ecryptfs_verbosity > 0)) {
+ ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n");
+ ecryptfs_dump_hex(session_key_encryption_key, 16);
+ }
+ rc = virt_to_scatterlist(crypt_stat->key,
+ (*key_rec).enc_key_size, src_sg, 2);
+ if (!rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
+ "for crypt_stat session key\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = virt_to_scatterlist((*key_rec).enc_key,
+ (*key_rec).enc_key_size, dest_sg, 2);
+ if (!rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
+ "for crypt_stat encrypted session key\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ if (!strcmp(crypt_stat->cipher,
+ crypt_stat->mount_crypt_stat->global_default_cipher_name)
+ && crypt_stat->mount_crypt_stat->global_key_tfm) {
+ tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
+ tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
+ } else
+ tfm = crypto_alloc_tfm(crypt_stat->cipher, 0);
+ if (!tfm) {
+ ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
+ "context for cipher [%s]\n",
+ crypt_stat->cipher);
+ rc = -EINVAL;
+ goto out;
+ }
+ if (tfm_mutex)
+ mutex_lock(tfm_mutex);
+ rc = crypto_cipher_setkey(tfm, session_key_encryption_key,
+ crypt_stat->key_size);
+ if (rc < 0) {
+ if (tfm_mutex)
+ mutex_unlock(tfm_mutex);
+ ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
+ "context\n");
+ goto out;
+ }
+ rc = 0;
+ ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
+ crypt_stat->key_size);
+ crypto_cipher_encrypt(tfm, dest_sg, src_sg,
+ (*key_rec).enc_key_size);
+ if (tfm_mutex)
+ mutex_unlock(tfm_mutex);
+ ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
+ if (ecryptfs_verbosity > 0)
+ ecryptfs_dump_hex((*key_rec).enc_key,
+ (*key_rec).enc_key_size);
+encrypted_session_key_set:
+ /* Now we have a valid key_rec. Append it to the
+ * key_rec set. */
+ key_rec_size = (sizeof(struct ecryptfs_key_record)
+ - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
+ + ((*key_rec).enc_key_size));
+ /* TODO: Include a packet size limit as a parameter to this
+ * function once we have multi-packet headers (for versions
+ * later than 0.1 */
+ if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
+ ecryptfs_printk(KERN_ERR, "Keyset too large\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ /* TODO: Packet size limit */
+ /* We have 5 bytes of surrounding packet data */
+ if ((0x05 + ECRYPTFS_SALT_SIZE
+ + (*key_rec).enc_key_size) >= max) {
+ ecryptfs_printk(KERN_ERR, "Authentication token is too "
+ "large\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ /* This format is inspired by OpenPGP; see RFC 2440
+ * packet tag 3 */
+ dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
+ /* ver+cipher+s2k+hash+salt+iter+enc_key */
+ rc = write_packet_length(&dest[(*packet_size)],
+ (0x05 + ECRYPTFS_SALT_SIZE
+ + (*key_rec).enc_key_size),
+ &packet_size_length);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet "
+ "header; cannot generate packet length\n");
+ goto out;
+ }
+ (*packet_size) += packet_size_length;
+ dest[(*packet_size)++] = 0x04; /* version 4 */
+ cipher_code = ecryptfs_code_for_cipher_string(crypt_stat);
+ if (cipher_code == 0) {
+ ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
+ "cipher [%s]\n", crypt_stat->cipher);
+ rc = -EINVAL;
+ goto out;
+ }
+ dest[(*packet_size)++] = cipher_code;
+ dest[(*packet_size)++] = 0x03; /* S2K */
+ dest[(*packet_size)++] = 0x01; /* MD5 (TODO: parameterize) */
+ memcpy(&dest[(*packet_size)], auth_tok->token.password.salt,
+ ECRYPTFS_SALT_SIZE);
+ (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */
+ dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */
+ memcpy(&dest[(*packet_size)], (*key_rec).enc_key,
+ (*key_rec).enc_key_size);
+ (*packet_size) += (*key_rec).enc_key_size;
+out:
+ if (tfm && !tfm_mutex)
+ crypto_free_tfm(tfm);
+ if (rc)
+ (*packet_size) = 0;
+ return rc;
+}
+
+/**
+ * ecryptfs_generate_key_packet_set
+ * @dest: Virtual address from which to write the key record set
+ * @crypt_stat: The cryptographic context from which the
+ * authentication tokens will be retrieved
+ * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat
+ * for the global parameters
+ * @len: The amount written
+ * @max: The maximum amount of data allowed to be written
+ *
+ * Generates a key packet set and writes it to the virtual address
+ * passed in.
+ *
+ * Returns zero on success; non-zero on error.
+ */
+int
+ecryptfs_generate_key_packet_set(char *dest_base,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct dentry *ecryptfs_dentry, size_t *len,
+ size_t max)
+{
+ int rc = 0;
+ struct ecryptfs_auth_tok *auth_tok;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+ &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ size_t written;
+ struct ecryptfs_key_record key_rec;
+
+ (*len) = 0;
+ if (mount_crypt_stat->global_auth_tok) {
+ auth_tok = mount_crypt_stat->global_auth_tok;
+ if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
+ rc = write_tag_3_packet((dest_base + (*len)),
+ max, auth_tok,
+ crypt_stat, &key_rec,
+ &written);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error "
+ "writing tag 3 packet\n");
+ goto out;
+ }
+ (*len) += written;
+ /* Write auth tok signature packet */
+ rc = write_tag_11_packet(
+ (dest_base + (*len)),
+ (max - (*len)),
+ key_rec.sig, ECRYPTFS_SIG_SIZE, &written);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error writing "
+ "auth tok signature packet\n");
+ goto out;
+ }
+ (*len) += written;
+ } else {
+ ecryptfs_printk(KERN_WARNING, "Unsupported "
+ "authentication token type\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error writing "
+ "authentication token packet with sig "
+ "= [%s]\n",
+ mount_crypt_stat->global_auth_tok_sig);
+ rc = -EIO;
+ goto out;
+ }
+ } else
+ BUG();
+ if (likely((max - (*len)) > 0)) {
+ dest_base[(*len)] = 0x00;
+ } else {
+ ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n");
+ rc = -EIO;
+ }
+out:
+ if (rc)
+ (*len) = 0;
+ return rc;
+}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
new file mode 100644
index 000000000000..7a11b8ae6644
--- /dev/null
+++ b/fs/ecryptfs/main.c
@@ -0,0 +1,831 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 1997-2003 Erez Zadok
+ * Copyright (C) 2001-2003 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Michael C. Thompson <mcthomps@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/dcache.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/skbuff.h>
+#include <linux/crypto.h>
+#include <linux/netlink.h>
+#include <linux/mount.h>
+#include <linux/dcache.h>
+#include <linux/pagemap.h>
+#include <linux/key.h>
+#include <linux/parser.h>
+#include "ecryptfs_kernel.h"
+
+/**
+ * Module parameter that defines the ecryptfs_verbosity level.
+ */
+int ecryptfs_verbosity = 0;
+
+module_param(ecryptfs_verbosity, int, 0);
+MODULE_PARM_DESC(ecryptfs_verbosity,
+ "Initial verbosity level (0 or 1; defaults to "
+ "0, which is Quiet)");
+
+void __ecryptfs_printk(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ if (fmt[1] == '7') { /* KERN_DEBUG */
+ if (ecryptfs_verbosity >= 1)
+ vprintk(fmt, args);
+ } else
+ vprintk(fmt, args);
+ va_end(args);
+}
+
+/**
+ * ecryptfs_interpose
+ * @lower_dentry: Existing dentry in the lower filesystem
+ * @dentry: ecryptfs' dentry
+ * @sb: ecryptfs's super_block
+ * @flag: If set to true, then d_add is called, else d_instantiate is called
+ *
+ * Interposes upper and lower dentries.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
+ struct super_block *sb, int flag)
+{
+ struct inode *lower_inode;
+ struct inode *inode;
+ int rc = 0;
+
+ lower_inode = lower_dentry->d_inode;
+ if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) {
+ rc = -EXDEV;
+ goto out;
+ }
+ if (!igrab(lower_inode)) {
+ rc = -ESTALE;
+ goto out;
+ }
+ inode = iget5_locked(sb, (unsigned long)lower_inode,
+ ecryptfs_inode_test, ecryptfs_inode_set,
+ lower_inode);
+ if (!inode) {
+ rc = -EACCES;
+ iput(lower_inode);
+ goto out;
+ }
+ if (inode->i_state & I_NEW)
+ unlock_new_inode(inode);
+ else
+ iput(lower_inode);
+ if (S_ISLNK(lower_inode->i_mode))
+ inode->i_op = &ecryptfs_symlink_iops;
+ else if (S_ISDIR(lower_inode->i_mode))
+ inode->i_op = &ecryptfs_dir_iops;
+ if (S_ISDIR(lower_inode->i_mode))
+ inode->i_fop = &ecryptfs_dir_fops;
+ /* TODO: Is there a better way to identify if the inode is
+ * special? */
+ if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
+ S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
+ init_special_inode(inode, lower_inode->i_mode,
+ lower_inode->i_rdev);
+ dentry->d_op = &ecryptfs_dops;
+ if (flag)
+ d_add(dentry, inode);
+ else
+ d_instantiate(dentry, inode);
+ ecryptfs_copy_attr_all(inode, lower_inode);
+ /* This size will be overwritten for real files w/ headers and
+ * other metadata */
+ ecryptfs_copy_inode_size(inode, lower_inode);
+out:
+ return rc;
+}
+
+enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug,
+ ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher,
+ ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes,
+ ecryptfs_opt_passthrough, ecryptfs_opt_err };
+
+static match_table_t tokens = {
+ {ecryptfs_opt_sig, "sig=%s"},
+ {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"},
+ {ecryptfs_opt_debug, "debug=%u"},
+ {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"},
+ {ecryptfs_opt_cipher, "cipher=%s"},
+ {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"},
+ {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"},
+ {ecryptfs_opt_passthrough, "ecryptfs_passthrough"},
+ {ecryptfs_opt_err, NULL}
+};
+
+/**
+ * ecryptfs_verify_version
+ * @version: The version number to confirm
+ *
+ * Returns zero on good version; non-zero otherwise
+ */
+static int ecryptfs_verify_version(u16 version)
+{
+ int rc = 0;
+ unsigned char major;
+ unsigned char minor;
+
+ major = ((version >> 8) & 0xFF);
+ minor = (version & 0xFF);
+ if (major != ECRYPTFS_VERSION_MAJOR) {
+ ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
+ "Expected [%d]; got [%d]\n",
+ ECRYPTFS_VERSION_MAJOR, major);
+ rc = -EINVAL;
+ goto out;
+ }
+ if (minor != ECRYPTFS_VERSION_MINOR) {
+ ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
+ "Expected [%d]; got [%d]\n",
+ ECRYPTFS_VERSION_MINOR, minor);
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_parse_options
+ * @sb: The ecryptfs super block
+ * @options: The options pased to the kernel
+ *
+ * Parse mount options:
+ * debug=N - ecryptfs_verbosity level for debug output
+ * sig=XXX - description(signature) of the key to use
+ *
+ * Returns the dentry object of the lower-level (lower/interposed)
+ * directory; We want to mount our stackable file system on top of
+ * that lower directory.
+ *
+ * The signature of the key to use must be the description of a key
+ * already in the keyring. Mounting will fail if the key can not be
+ * found.
+ *
+ * Returns zero on success; non-zero on error
+ */
+static int ecryptfs_parse_options(struct super_block *sb, char *options)
+{
+ char *p;
+ int rc = 0;
+ int sig_set = 0;
+ int cipher_name_set = 0;
+ int cipher_key_bytes;
+ int cipher_key_bytes_set = 0;
+ struct key *auth_tok_key = NULL;
+ struct ecryptfs_auth_tok *auth_tok = NULL;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+ &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
+ substring_t args[MAX_OPT_ARGS];
+ int token;
+ char *sig_src;
+ char *sig_dst;
+ char *debug_src;
+ char *cipher_name_dst;
+ char *cipher_name_src;
+ char *cipher_key_bytes_src;
+ struct crypto_tfm *tmp_tfm;
+ int cipher_name_len;
+
+ if (!options) {
+ rc = -EINVAL;
+ goto out;
+ }
+ while ((p = strsep(&options, ",")) != NULL) {
+ if (!*p)
+ continue;
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case ecryptfs_opt_sig:
+ case ecryptfs_opt_ecryptfs_sig:
+ sig_src = args[0].from;
+ sig_dst =
+ mount_crypt_stat->global_auth_tok_sig;
+ memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX);
+ sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0';
+ ecryptfs_printk(KERN_DEBUG,
+ "The mount_crypt_stat "
+ "global_auth_tok_sig set to: "
+ "[%s]\n", sig_dst);
+ sig_set = 1;
+ break;
+ case ecryptfs_opt_debug:
+ case ecryptfs_opt_ecryptfs_debug:
+ debug_src = args[0].from;
+ ecryptfs_verbosity =
+ (int)simple_strtol(debug_src, &debug_src,
+ 0);
+ ecryptfs_printk(KERN_DEBUG,
+ "Verbosity set to [%d]" "\n",
+ ecryptfs_verbosity);
+ break;
+ case ecryptfs_opt_cipher:
+ case ecryptfs_opt_ecryptfs_cipher:
+ cipher_name_src = args[0].from;
+ cipher_name_dst =
+ mount_crypt_stat->
+ global_default_cipher_name;
+ strncpy(cipher_name_dst, cipher_name_src,
+ ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+ ecryptfs_printk(KERN_DEBUG,
+ "The mount_crypt_stat "
+ "global_default_cipher_name set to: "
+ "[%s]\n", cipher_name_dst);
+ cipher_name_set = 1;
+ break;
+ case ecryptfs_opt_ecryptfs_key_bytes:
+ cipher_key_bytes_src = args[0].from;
+ cipher_key_bytes =
+ (int)simple_strtol(cipher_key_bytes_src,
+ &cipher_key_bytes_src, 0);
+ mount_crypt_stat->global_default_cipher_key_size =
+ cipher_key_bytes;
+ ecryptfs_printk(KERN_DEBUG,
+ "The mount_crypt_stat "
+ "global_default_cipher_key_size "
+ "set to: [%d]\n", mount_crypt_stat->
+ global_default_cipher_key_size);
+ cipher_key_bytes_set = 1;
+ break;
+ case ecryptfs_opt_passthrough:
+ mount_crypt_stat->flags |=
+ ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED;
+ break;
+ case ecryptfs_opt_err:
+ default:
+ ecryptfs_printk(KERN_WARNING,
+ "eCryptfs: unrecognized option '%s'\n",
+ p);
+ }
+ }
+ /* Do not support lack of mount-wide signature in 0.1
+ * release */
+ if (!sig_set) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "You must supply a valid "
+ "passphrase auth tok signature as a mount "
+ "parameter; see the eCryptfs README\n");
+ goto out;
+ }
+ if (!cipher_name_set) {
+ cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
+ if (unlikely(cipher_name_len
+ >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) {
+ rc = -EINVAL;
+ BUG();
+ goto out;
+ }
+ memcpy(mount_crypt_stat->global_default_cipher_name,
+ ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
+ mount_crypt_stat->global_default_cipher_name[cipher_name_len]
+ = '\0';
+ }
+ if (!cipher_key_bytes_set) {
+ mount_crypt_stat->global_default_cipher_key_size =
+ ECRYPTFS_DEFAULT_KEY_BYTES;
+ ecryptfs_printk(KERN_DEBUG, "Cipher key size was not "
+ "specified. Defaulting to [%d]\n",
+ mount_crypt_stat->
+ global_default_cipher_key_size);
+ }
+ rc = ecryptfs_process_cipher(
+ &tmp_tfm,
+ &mount_crypt_stat->global_key_tfm,
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_key_size);
+ if (tmp_tfm)
+ crypto_free_tfm(tmp_tfm);
+ if (rc) {
+ printk(KERN_ERR "Error attempting to initialize cipher [%s] "
+ "with key size [%Zd] bytes; rc = [%d]\n",
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_key_size, rc);
+ rc = -EINVAL;
+ goto out;
+ }
+ mutex_init(&mount_crypt_stat->global_key_tfm_mutex);
+ ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: "
+ "[%s]\n", mount_crypt_stat->global_auth_tok_sig);
+ /* The reference to this key is held until umount is done The
+ * call to key_put is done in ecryptfs_put_super() */
+ auth_tok_key = request_key(&key_type_user,
+ mount_crypt_stat->global_auth_tok_sig,
+ NULL);
+ if (!auth_tok_key || IS_ERR(auth_tok_key)) {
+ ecryptfs_printk(KERN_ERR, "Could not find key with "
+ "description: [%s]\n",
+ mount_crypt_stat->global_auth_tok_sig);
+ process_request_key_err(PTR_ERR(auth_tok_key));
+ rc = -EINVAL;
+ goto out;
+ }
+ auth_tok = ecryptfs_get_key_payload_data(auth_tok_key);
+ if (ecryptfs_verify_version(auth_tok->version)) {
+ ecryptfs_printk(KERN_ERR, "Data structure version mismatch. "
+ "Userspace tools must match eCryptfs kernel "
+ "module with major version [%d] and minor "
+ "version [%d]\n", ECRYPTFS_VERSION_MAJOR,
+ ECRYPTFS_VERSION_MINOR);
+ rc = -EINVAL;
+ goto out;
+ }
+ if (auth_tok->token_type != ECRYPTFS_PASSWORD) {
+ ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure "
+ "returned from key\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ mount_crypt_stat->global_auth_tok_key = auth_tok_key;
+ mount_crypt_stat->global_auth_tok = auth_tok;
+out:
+ return rc;
+}
+
+struct kmem_cache *ecryptfs_sb_info_cache;
+
+/**
+ * ecryptfs_fill_super
+ * @sb: The ecryptfs super block
+ * @raw_data: The options passed to mount
+ * @silent: Not used but required by function prototype
+ *
+ * Sets up what we can of the sb, rest is done in ecryptfs_read_super
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int
+ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
+{
+ int rc = 0;
+
+ /* Released in ecryptfs_put_super() */
+ ecryptfs_set_superblock_private(sb,
+ kmem_cache_alloc(ecryptfs_sb_info_cache,
+ SLAB_KERNEL));
+ if (!ecryptfs_superblock_to_private(sb)) {
+ ecryptfs_printk(KERN_WARNING, "Out of memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(ecryptfs_superblock_to_private(sb), 0,
+ sizeof(struct ecryptfs_sb_info));
+ sb->s_op = &ecryptfs_sops;
+ /* Released through deactivate_super(sb) from get_sb_nodev */
+ sb->s_root = d_alloc(NULL, &(const struct qstr) {
+ .hash = 0,.name = "/",.len = 1});
+ if (!sb->s_root) {
+ ecryptfs_printk(KERN_ERR, "d_alloc failed\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ sb->s_root->d_op = &ecryptfs_dops;
+ sb->s_root->d_sb = sb;
+ sb->s_root->d_parent = sb->s_root;
+ /* Released in d_release when dput(sb->s_root) is called */
+ /* through deactivate_super(sb) from get_sb_nodev() */
+ ecryptfs_set_dentry_private(sb->s_root,
+ kmem_cache_alloc(ecryptfs_dentry_info_cache,
+ SLAB_KERNEL));
+ if (!ecryptfs_dentry_to_private(sb->s_root)) {
+ ecryptfs_printk(KERN_ERR,
+ "dentry_info_cache alloc failed\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(ecryptfs_dentry_to_private(sb->s_root), 0,
+ sizeof(struct ecryptfs_dentry_info));
+ rc = 0;
+out:
+ /* Should be able to rely on deactivate_super called from
+ * get_sb_nodev */
+ return rc;
+}
+
+/**
+ * ecryptfs_read_super
+ * @sb: The ecryptfs super block
+ * @dev_name: The path to mount over
+ *
+ * Read the super block of the lower filesystem, and use
+ * ecryptfs_interpose to create our initial inode and super block
+ * struct.
+ */
+static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
+{
+ int rc;
+ struct nameidata nd;
+ struct dentry *lower_root;
+ struct vfsmount *lower_mnt;
+
+ memset(&nd, 0, sizeof(struct nameidata));
+ rc = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n");
+ goto out_free;
+ }
+ lower_root = nd.dentry;
+ if (!lower_root->d_inode) {
+ ecryptfs_printk(KERN_WARNING,
+ "No directory to interpose on\n");
+ rc = -ENOENT;
+ goto out_free;
+ }
+ lower_mnt = nd.mnt;
+ ecryptfs_set_superblock_lower(sb, lower_root->d_sb);
+ sb->s_maxbytes = lower_root->d_sb->s_maxbytes;
+ ecryptfs_set_dentry_lower(sb->s_root, lower_root);
+ ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt);
+ if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0)))
+ goto out_free;
+ rc = 0;
+ goto out;
+out_free:
+ path_release(&nd);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_get_sb
+ * @fs_type
+ * @flags
+ * @dev_name: The path to mount over
+ * @raw_data: The options passed into the kernel
+ *
+ * The whole ecryptfs_get_sb process is broken into 4 functions:
+ * ecryptfs_parse_options(): handle options passed to ecryptfs, if any
+ * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block
+ * with as much information as it can before needing
+ * the lower filesystem.
+ * ecryptfs_read_super(): this accesses the lower filesystem and uses
+ * ecryptfs_interpolate to perform most of the linking
+ * ecryptfs_interpolate(): links the lower filesystem into ecryptfs
+ */
+static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data,
+ struct vfsmount *mnt)
+{
+ int rc;
+ struct super_block *sb;
+
+ rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt);
+ if (rc < 0) {
+ printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc);
+ goto out;
+ }
+ sb = mnt->mnt_sb;
+ rc = ecryptfs_parse_options(sb, raw_data);
+ if (rc) {
+ printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc);
+ goto out_abort;
+ }
+ rc = ecryptfs_read_super(sb, dev_name);
+ if (rc) {
+ printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc);
+ goto out_abort;
+ }
+ goto out;
+out_abort:
+ dput(sb->s_root);
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_kill_block_super
+ * @sb: The ecryptfs super block
+ *
+ * Used to bring the superblock down and free the private data.
+ * Private data is free'd in ecryptfs_put_super()
+ */
+static void ecryptfs_kill_block_super(struct super_block *sb)
+{
+ generic_shutdown_super(sb);
+}
+
+static struct file_system_type ecryptfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ecryptfs",
+ .get_sb = ecryptfs_get_sb,
+ .kill_sb = ecryptfs_kill_block_super,
+ .fs_flags = 0
+};
+
+/**
+ * inode_info_init_once
+ *
+ * Initializes the ecryptfs_inode_info_cache when it is created
+ */
+static void
+inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
+{
+ struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
+
+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ inode_init_once(&ei->vfs_inode);
+}
+
+static struct ecryptfs_cache_info {
+ kmem_cache_t **cache;
+ const char *name;
+ size_t size;
+ void (*ctor)(void*, struct kmem_cache *, unsigned long);
+} ecryptfs_cache_infos[] = {
+ {
+ .cache = &ecryptfs_auth_tok_list_item_cache,
+ .name = "ecryptfs_auth_tok_list_item",
+ .size = sizeof(struct ecryptfs_auth_tok_list_item),
+ },
+ {
+ .cache = &ecryptfs_file_info_cache,
+ .name = "ecryptfs_file_cache",
+ .size = sizeof(struct ecryptfs_file_info),
+ },
+ {
+ .cache = &ecryptfs_dentry_info_cache,
+ .name = "ecryptfs_dentry_info_cache",
+ .size = sizeof(struct ecryptfs_dentry_info),
+ },
+ {
+ .cache = &ecryptfs_inode_info_cache,
+ .name = "ecryptfs_inode_cache",
+ .size = sizeof(struct ecryptfs_inode_info),
+ .ctor = inode_info_init_once,
+ },
+ {
+ .cache = &ecryptfs_sb_info_cache,
+ .name = "ecryptfs_sb_cache",
+ .size = sizeof(struct ecryptfs_sb_info),
+ },
+ {
+ .cache = &ecryptfs_header_cache_0,
+ .name = "ecryptfs_headers_0",
+ .size = PAGE_CACHE_SIZE,
+ },
+ {
+ .cache = &ecryptfs_header_cache_1,
+ .name = "ecryptfs_headers_1",
+ .size = PAGE_CACHE_SIZE,
+ },
+ {
+ .cache = &ecryptfs_header_cache_2,
+ .name = "ecryptfs_headers_2",
+ .size = PAGE_CACHE_SIZE,
+ },
+ {
+ .cache = &ecryptfs_lower_page_cache,
+ .name = "ecryptfs_lower_page_cache",
+ .size = PAGE_CACHE_SIZE,
+ },
+};
+
+static void ecryptfs_free_kmem_caches(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
+ struct ecryptfs_cache_info *info;
+
+ info = &ecryptfs_cache_infos[i];
+ if (*(info->cache))
+ kmem_cache_destroy(*(info->cache));
+ }
+}
+
+/**
+ * ecryptfs_init_kmem_caches
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_init_kmem_caches(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
+ struct ecryptfs_cache_info *info;
+
+ info = &ecryptfs_cache_infos[i];
+ *(info->cache) = kmem_cache_create(info->name, info->size,
+ 0, SLAB_HWCACHE_ALIGN, info->ctor, NULL);
+ if (!*(info->cache)) {
+ ecryptfs_free_kmem_caches();
+ ecryptfs_printk(KERN_WARNING, "%s: "
+ "kmem_cache_create failed\n",
+ info->name);
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+struct ecryptfs_obj {
+ char *name;
+ struct list_head slot_list;
+ struct kobject kobj;
+};
+
+struct ecryptfs_attribute {
+ struct attribute attr;
+ ssize_t(*show) (struct ecryptfs_obj *, char *);
+ ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t);
+};
+
+static ssize_t
+ecryptfs_attr_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t len)
+{
+ struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
+ kobj);
+ struct ecryptfs_attribute *attribute =
+ container_of(attr, struct ecryptfs_attribute, attr);
+
+ return (attribute->store ? attribute->store(obj, buf, len) : 0);
+}
+
+static ssize_t
+ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
+ kobj);
+ struct ecryptfs_attribute *attribute =
+ container_of(attr, struct ecryptfs_attribute, attr);
+
+ return (attribute->show ? attribute->show(obj, buf) : 0);
+}
+
+static struct sysfs_ops ecryptfs_sysfs_ops = {
+ .show = ecryptfs_attr_show,
+ .store = ecryptfs_attr_store
+};
+
+static struct kobj_type ecryptfs_ktype = {
+ .sysfs_ops = &ecryptfs_sysfs_ops
+};
+
+static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL);
+
+static ssize_t version_show(struct ecryptfs_obj *obj, char *buff)
+{
+ return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK);
+}
+
+static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version);
+
+struct ecryptfs_version_str_map_elem {
+ u32 flag;
+ char *str;
+} ecryptfs_version_str_map[] = {
+ {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"},
+ {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
+ {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
+ {ECRYPTFS_VERSIONING_POLICY, "policy"}
+};
+
+static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
+{
+ int i;
+ int remaining = PAGE_SIZE;
+ int total_written = 0;
+
+ buff[0] = '\0';
+ for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) {
+ int entry_size;
+
+ if (!(ECRYPTFS_VERSIONING_MASK
+ & ecryptfs_version_str_map[i].flag))
+ continue;
+ entry_size = strlen(ecryptfs_version_str_map[i].str);
+ if ((entry_size + 2) > remaining)
+ goto out;
+ memcpy(buff, ecryptfs_version_str_map[i].str, entry_size);
+ buff[entry_size++] = '\n';
+ buff[entry_size] = '\0';
+ buff += entry_size;
+ total_written += entry_size;
+ remaining -= entry_size;
+ }
+out:
+ return total_written;
+}
+
+static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str);
+
+static int do_sysfs_registration(void)
+{
+ int rc;
+
+ if ((rc = subsystem_register(&ecryptfs_subsys))) {
+ printk(KERN_ERR
+ "Unable to register ecryptfs sysfs subsystem\n");
+ goto out;
+ }
+ rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj,
+ &sysfs_attr_version.attr);
+ if (rc) {
+ printk(KERN_ERR
+ "Unable to create ecryptfs version attribute\n");
+ subsystem_unregister(&ecryptfs_subsys);
+ goto out;
+ }
+ rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj,
+ &sysfs_attr_version_str.attr);
+ if (rc) {
+ printk(KERN_ERR
+ "Unable to create ecryptfs version_str attribute\n");
+ sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
+ &sysfs_attr_version.attr);
+ subsystem_unregister(&ecryptfs_subsys);
+ goto out;
+ }
+out:
+ return rc;
+}
+
+static int __init ecryptfs_init(void)
+{
+ int rc;
+
+ if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
+ "larger than the host's page size, and so "
+ "eCryptfs cannot run on this system. The "
+ "default eCryptfs extent size is [%d] bytes; "
+ "the page size is [%d] bytes.\n",
+ ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE);
+ goto out;
+ }
+ rc = ecryptfs_init_kmem_caches();
+ if (rc) {
+ printk(KERN_ERR
+ "Failed to allocate one or more kmem_cache objects\n");
+ goto out;
+ }
+ rc = register_filesystem(&ecryptfs_fs_type);
+ if (rc) {
+ printk(KERN_ERR "Failed to register filesystem\n");
+ ecryptfs_free_kmem_caches();
+ goto out;
+ }
+ kset_set_kset_s(&ecryptfs_subsys, fs_subsys);
+ sysfs_attr_version.attr.owner = THIS_MODULE;
+ sysfs_attr_version_str.attr.owner = THIS_MODULE;
+ rc = do_sysfs_registration();
+ if (rc) {
+ printk(KERN_ERR "sysfs registration failed\n");
+ unregister_filesystem(&ecryptfs_fs_type);
+ ecryptfs_free_kmem_caches();
+ goto out;
+ }
+out:
+ return rc;
+}
+
+static void __exit ecryptfs_exit(void)
+{
+ sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
+ &sysfs_attr_version.attr);
+ sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
+ &sysfs_attr_version_str.attr);
+ subsystem_unregister(&ecryptfs_subsys);
+ unregister_filesystem(&ecryptfs_fs_type);
+ ecryptfs_free_kmem_caches();
+}
+
+MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>");
+MODULE_DESCRIPTION("eCryptfs");
+
+MODULE_LICENSE("GPL");
+
+module_init(ecryptfs_init)
+module_exit(ecryptfs_exit)
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
new file mode 100644
index 000000000000..924dd90a4cf5
--- /dev/null
+++ b/fs/ecryptfs/mmap.c
@@ -0,0 +1,788 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ * This is where eCryptfs coordinates the symmetric encryption and
+ * decryption of the file data as it passes between the lower
+ * encrypted file and the upper decrypted file.
+ *
+ * Copyright (C) 1997-2003 Erez Zadok
+ * Copyright (C) 2001-2003 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/page-flags.h>
+#include <linux/mount.h>
+#include <linux/file.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include "ecryptfs_kernel.h"
+
+struct kmem_cache *ecryptfs_lower_page_cache;
+
+/**
+ * ecryptfs_get1page
+ *
+ * Get one page from cache or lower f/s, return error otherwise.
+ *
+ * Returns unlocked and up-to-date page (if ok), with increased
+ * refcnt.
+ */
+static struct page *ecryptfs_get1page(struct file *file, int index)
+{
+ struct page *page;
+ struct dentry *dentry;
+ struct inode *inode;
+ struct address_space *mapping;
+
+ dentry = file->f_dentry;
+ inode = dentry->d_inode;
+ mapping = inode->i_mapping;
+ page = read_cache_page(mapping, index,
+ (filler_t *)mapping->a_ops->readpage,
+ (void *)file);
+ if (IS_ERR(page))
+ goto out;
+ wait_on_page_locked(page);
+out:
+ return page;
+}
+
+static
+int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros);
+
+/**
+ * ecryptfs_fill_zeros
+ * @file: The ecryptfs file
+ * @new_length: The new length of the data in the underlying file;
+ * everything between the prior end of the file and the
+ * new end of the file will be filled with zero's.
+ * new_length must be greater than current length
+ *
+ * Function for handling lseek-ing past the end of the file.
+ *
+ * This function does not support shrinking, only growing a file.
+ *
+ * Returns zero on success; non-zero otherwise.
+ */
+int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
+{
+ int rc = 0;
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ pgoff_t old_end_page_index = 0;
+ pgoff_t index = old_end_page_index;
+ int old_end_pos_in_page = -1;
+ pgoff_t new_end_page_index;
+ int new_end_pos_in_page;
+ loff_t cur_length = i_size_read(inode);
+
+ if (cur_length != 0) {
+ index = old_end_page_index =
+ ((cur_length - 1) >> PAGE_CACHE_SHIFT);
+ old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK);
+ }
+ new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
+ new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
+ ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; "
+ "old_end_pos_in_page = [%d]; "
+ "new_end_page_index = [0x%.16x]; "
+ "new_end_pos_in_page = [%d]\n",
+ old_end_page_index, old_end_pos_in_page,
+ new_end_page_index, new_end_pos_in_page);
+ if (old_end_page_index == new_end_page_index) {
+ /* Start and end are in the same page; we just need to
+ * set a portion of the existing page to zero's */
+ rc = write_zeros(file, index, (old_end_pos_in_page + 1),
+ (new_end_pos_in_page - old_end_pos_in_page));
+ if (rc)
+ ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+ "index=[0x%.16x], "
+ "old_end_pos_in_page=[d], "
+ "(PAGE_CACHE_SIZE - new_end_pos_in_page"
+ "=[%d]"
+ ")=[d]) returned [%d]\n", file, index,
+ old_end_pos_in_page,
+ new_end_pos_in_page,
+ (PAGE_CACHE_SIZE - new_end_pos_in_page),
+ rc);
+ goto out;
+ }
+ /* Fill the remainder of the previous last page with zeros */
+ rc = write_zeros(file, index, (old_end_pos_in_page + 1),
+ ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+ "index=[0x%.16x], old_end_pos_in_page=[d], "
+ "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
+ "returned [%d]\n", file, index,
+ old_end_pos_in_page,
+ (PAGE_CACHE_SIZE - old_end_pos_in_page), rc);
+ goto out;
+ }
+ index++;
+ while (index < new_end_page_index) {
+ /* Fill all intermediate pages with zeros */
+ rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], "
+ "index=[0x%.16x], "
+ "old_end_pos_in_page=[d], "
+ "(PAGE_CACHE_SIZE - new_end_pos_in_page"
+ "=[%d]"
+ ")=[d]) returned [%d]\n", file, index,
+ old_end_pos_in_page,
+ new_end_pos_in_page,
+ (PAGE_CACHE_SIZE - new_end_pos_in_page),
+ rc);
+ goto out;
+ }
+ index++;
+ }
+ /* Fill the portion at the beginning of the last new page with
+ * zero's */
+ rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1));
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "write_zeros(file="
+ "[%p], index=[0x%.16x], 0, "
+ "new_end_pos_in_page=[%d]"
+ "returned [%d]\n", file, index,
+ new_end_pos_in_page, rc);
+ goto out;
+ }
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_writepage
+ * @page: Page that is locked before this call is made
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct ecryptfs_page_crypt_context ctx;
+ int rc;
+
+ ctx.page = page;
+ ctx.mode = ECRYPTFS_WRITEPAGE_MODE;
+ ctx.param.wbc = wbc;
+ rc = ecryptfs_encrypt_page(&ctx);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error encrypting "
+ "page (upper index [0x%.16x])\n", page->index);
+ ClearPageUptodate(page);
+ goto out;
+ }
+ SetPageUptodate(page);
+ unlock_page(page);
+out:
+ return rc;
+}
+
+/**
+ * Reads the data from the lower file file at index lower_page_index
+ * and copies that data into page.
+ *
+ * @param page Page to fill
+ * @param lower_page_index Index of the page in the lower file to get
+ */
+int ecryptfs_do_readpage(struct file *file, struct page *page,
+ pgoff_t lower_page_index)
+{
+ int rc;
+ struct dentry *dentry;
+ struct file *lower_file;
+ struct dentry *lower_dentry;
+ struct inode *inode;
+ struct inode *lower_inode;
+ char *page_data;
+ struct page *lower_page = NULL;
+ char *lower_page_data;
+ const struct address_space_operations *lower_a_ops;
+
+ dentry = file->f_dentry;
+ lower_file = ecryptfs_file_to_lower(file);
+ lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ inode = dentry->d_inode;
+ lower_inode = ecryptfs_inode_to_lower(inode);
+ lower_a_ops = lower_inode->i_mapping->a_ops;
+ lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index,
+ (filler_t *)lower_a_ops->readpage,
+ (void *)lower_file);
+ if (IS_ERR(lower_page)) {
+ rc = PTR_ERR(lower_page);
+ lower_page = NULL;
+ ecryptfs_printk(KERN_ERR, "Error reading from page cache\n");
+ goto out;
+ }
+ wait_on_page_locked(lower_page);
+ page_data = (char *)kmap(page);
+ if (!page_data) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Error mapping page\n");
+ goto out;
+ }
+ lower_page_data = (char *)kmap(lower_page);
+ if (!lower_page_data) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Error mapping page\n");
+ kunmap(page);
+ goto out;
+ }
+ memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
+ kunmap(lower_page);
+ kunmap(page);
+ rc = 0;
+out:
+ if (likely(lower_page))
+ page_cache_release(lower_page);
+ if (rc == 0)
+ SetPageUptodate(page);
+ else
+ ClearPageUptodate(page);
+ return rc;
+}
+
+/**
+ * ecryptfs_readpage
+ * @file: This is an ecryptfs file
+ * @page: ecryptfs associated page to stick the read data into
+ *
+ * Read in a page, decrypting if necessary.
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int ecryptfs_readpage(struct file *file, struct page *page)
+{
+ int rc = 0;
+ struct ecryptfs_crypt_stat *crypt_stat;
+
+ BUG_ON(!(file && file->f_dentry && file->f_dentry->d_inode));
+ crypt_stat =
+ &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
+ if (!crypt_stat
+ || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)
+ || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
+ ecryptfs_printk(KERN_DEBUG,
+ "Passing through unencrypted page\n");
+ rc = ecryptfs_do_readpage(file, page, page->index);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error reading page; rc = "
+ "[%d]\n", rc);
+ goto out;
+ }
+ } else {
+ rc = ecryptfs_decrypt_page(file, page);
+ if (rc) {
+
+ ecryptfs_printk(KERN_ERR, "Error decrypting page; "
+ "rc = [%d]\n", rc);
+ goto out;
+ }
+ }
+ SetPageUptodate(page);
+out:
+ if (rc)
+ ClearPageUptodate(page);
+ ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
+ page->index);
+ unlock_page(page);
+ return rc;
+}
+
+static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
+{
+ struct inode *inode = page->mapping->host;
+ int end_byte_in_page;
+ int rc = 0;
+ char *page_virt;
+
+ if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) {
+ end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
+ if (to > end_byte_in_page)
+ end_byte_in_page = to;
+ page_virt = kmap(page);
+ if (!page_virt) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_WARNING,
+ "Could not map page\n");
+ goto out;
+ }
+ memset((page_virt + end_byte_in_page), 0,
+ (PAGE_CACHE_SIZE - end_byte_in_page));
+ kunmap(page);
+ }
+out:
+ return rc;
+}
+
+static int ecryptfs_prepare_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ int rc = 0;
+
+ kmap(page);
+ if (from == 0 && to == PAGE_CACHE_SIZE)
+ goto out; /* If we are writing a full page, it will be
+ up to date. */
+ if (!PageUptodate(page))
+ rc = ecryptfs_do_readpage(file, page, page->index);
+out:
+ return rc;
+}
+
+int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
+ char **lower_virt,
+ struct inode *lower_inode,
+ unsigned long lower_page_index)
+{
+ int rc = 0;
+
+ (*lower_page) = grab_cache_page(lower_inode->i_mapping,
+ lower_page_index);
+ if (!(*lower_page)) {
+ ecryptfs_printk(KERN_ERR, "grab_cache_page for "
+ "lower_page_index = [0x%.16x] failed\n",
+ lower_page_index);
+ rc = -EINVAL;
+ goto out;
+ }
+ if (lower_virt)
+ (*lower_virt) = kmap((*lower_page));
+ else
+ kmap((*lower_page));
+out:
+ return rc;
+}
+
+int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
+ struct inode *lower_inode,
+ struct writeback_control *wbc)
+{
+ int rc = 0;
+
+ rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); "
+ "rc = [%d]\n", rc);
+ goto out;
+ }
+ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
+ page_cache_release(lower_page);
+out:
+ return rc;
+}
+
+static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page)
+{
+ kunmap(lower_page);
+ ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = "
+ "[0x%.16x]\n", lower_page->index);
+ unlock_page(lower_page);
+ page_cache_release(lower_page);
+}
+
+/**
+ * ecryptfs_write_inode_size_to_header
+ *
+ * Writes the lower file size to the first 8 bytes of the header.
+ *
+ * Returns zero on success; non-zero on error.
+ */
+int
+ecryptfs_write_inode_size_to_header(struct file *lower_file,
+ struct inode *lower_inode,
+ struct inode *inode)
+{
+ int rc = 0;
+ struct page *header_page;
+ char *header_virt;
+ const struct address_space_operations *lower_a_ops;
+ u64 file_size;
+
+ rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt,
+ lower_inode, 0);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "grab_cache_page for header page "
+ "failed\n");
+ goto out;
+ }
+ lower_a_ops = lower_inode->i_mapping->a_ops;
+ rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8);
+ file_size = (u64)i_size_read(inode);
+ ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
+ file_size = cpu_to_be64(file_size);
+ memcpy(header_virt, &file_size, sizeof(u64));
+ rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8);
+ if (rc < 0)
+ ecryptfs_printk(KERN_ERR, "Error commiting header page "
+ "write\n");
+ ecryptfs_unmap_and_release_lower_page(header_page);
+ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+out:
+ return rc;
+}
+
+int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
+ struct file *lower_file,
+ unsigned long lower_page_index, int byte_offset,
+ int region_bytes)
+{
+ int rc = 0;
+
+ rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode,
+ lower_page_index);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error attempting to grab and map "
+ "lower page with index [0x%.16x]\n",
+ lower_page_index);
+ goto out;
+ }
+ rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file,
+ (*lower_page),
+ byte_offset,
+ region_bytes);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "prepare_write for "
+ "lower_page_index = [0x%.16x] failed; rc = "
+ "[%d]\n", lower_page_index, rc);
+ }
+out:
+ if (rc && (*lower_page)) {
+ ecryptfs_unmap_and_release_lower_page(*lower_page);
+ (*lower_page) = NULL;
+ }
+ return rc;
+}
+
+/**
+ * ecryptfs_commit_lower_page
+ *
+ * Returns zero on success; non-zero on error
+ */
+int
+ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
+ struct file *lower_file, int byte_offset,
+ int region_size)
+{
+ int rc = 0;
+
+ rc = lower_inode->i_mapping->a_ops->commit_write(
+ lower_file, lower_page, byte_offset, region_size);
+ if (rc < 0) {
+ ecryptfs_printk(KERN_ERR,
+ "Error committing write; rc = [%d]\n", rc);
+ } else
+ rc = 0;
+ ecryptfs_unmap_and_release_lower_page(lower_page);
+ return rc;
+}
+
+/**
+ * ecryptfs_copy_page_to_lower
+ *
+ * Used for plaintext pass-through; no page index interpolation
+ * required.
+ */
+int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
+ struct file *lower_file)
+{
+ int rc = 0;
+ struct page *lower_page;
+
+ rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file,
+ page->index, 0, PAGE_CACHE_SIZE);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error attempting to get page "
+ "at index [0x%.16x]\n", page->index);
+ goto out;
+ }
+ /* TODO: aops */
+ memcpy((char *)page_address(lower_page), page_address(page),
+ PAGE_CACHE_SIZE);
+ rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file,
+ 0, PAGE_CACHE_SIZE);
+ if (rc)
+ ecryptfs_printk(KERN_ERR, "Error attempting to commit page "
+ "at index [0x%.16x]\n", page->index);
+out:
+ return rc;
+}
+
+static int
+process_new_file(struct ecryptfs_crypt_stat *crypt_stat,
+ struct file *file, struct inode *inode)
+{
+ struct page *header_page;
+ const struct address_space_operations *lower_a_ops;
+ struct inode *lower_inode;
+ struct file *lower_file;
+ char *header_virt;
+ int rc = 0;
+ int current_header_page = 0;
+ int header_pages;
+ int more_header_data_to_be_written = 1;
+
+ lower_inode = ecryptfs_inode_to_lower(inode);
+ lower_file = ecryptfs_file_to_lower(file);
+ lower_a_ops = lower_inode->i_mapping->a_ops;
+ header_pages = ((crypt_stat->header_extent_size
+ * crypt_stat->num_header_extents_at_front)
+ / PAGE_CACHE_SIZE);
+ BUG_ON(header_pages < 1);
+ while (current_header_page < header_pages) {
+ rc = ecryptfs_grab_and_map_lower_page(&header_page,
+ &header_virt,
+ lower_inode,
+ current_header_page);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "grab_cache_page for "
+ "header page [%d] failed; rc = [%d]\n",
+ current_header_page, rc);
+ goto out;
+ }
+ rc = lower_a_ops->prepare_write(lower_file, header_page, 0,
+ PAGE_CACHE_SIZE);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error preparing to write "
+ "header page out; rc = [%d]\n", rc);
+ goto out;
+ }
+ memset(header_virt, 0, PAGE_CACHE_SIZE);
+ if (more_header_data_to_be_written) {
+ rc = ecryptfs_write_headers_virt(header_virt,
+ crypt_stat,
+ file->f_dentry);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error "
+ "generating header; rc = "
+ "[%d]\n", rc);
+ rc = -EIO;
+ memset(header_virt, 0, PAGE_CACHE_SIZE);
+ ecryptfs_unmap_and_release_lower_page(
+ header_page);
+ goto out;
+ }
+ if (current_header_page == 0)
+ memset(header_virt, 0, 8);
+ more_header_data_to_be_written = 0;
+ }
+ rc = lower_a_ops->commit_write(lower_file, header_page, 0,
+ PAGE_CACHE_SIZE);
+ ecryptfs_unmap_and_release_lower_page(header_page);
+ if (rc < 0) {
+ ecryptfs_printk(KERN_ERR,
+ "Error commiting header page write; "
+ "rc = [%d]\n", rc);
+ break;
+ }
+ current_header_page++;
+ }
+ if (rc >= 0) {
+ rc = 0;
+ ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = "
+ "[0x%.16x]\n", lower_inode->i_blocks);
+ i_size_write(inode, 0);
+ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+ }
+ ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in "
+ "crypt_stat at memory location [%p]\n", crypt_stat);
+ ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_commit_write
+ * @file: The eCryptfs file object
+ * @page: The eCryptfs page
+ * @from: Ignored (we rotate the page IV on each write)
+ * @to: Ignored
+ *
+ * This is where we encrypt the data and pass the encrypted data to
+ * the lower filesystem. In OpenPGP-compatible mode, we operate on
+ * entire underlying packets.
+ */
+static int ecryptfs_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ struct ecryptfs_page_crypt_context ctx;
+ loff_t pos;
+ struct inode *inode;
+ struct inode *lower_inode;
+ struct file *lower_file;
+ struct ecryptfs_crypt_stat *crypt_stat;
+ int rc;
+
+ inode = page->mapping->host;
+ lower_inode = ecryptfs_inode_to_lower(inode);
+ lower_file = ecryptfs_file_to_lower(file);
+ mutex_lock(&lower_inode->i_mutex);
+ crypt_stat =
+ &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat;
+ if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
+ ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
+ "crypt_stat at memory location [%p]\n", crypt_stat);
+ rc = process_new_file(crypt_stat, file, inode);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error processing new "
+ "file; rc = [%d]\n", rc);
+ goto out;
+ }
+ } else
+ ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
+ ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
+ "(page w/ index = [0x%.16x], to = [%d])\n", page->index,
+ to);
+ rc = fill_zeros_to_end_of_page(page, to);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
+ "zeros in page with index = [0x%.16x]\n",
+ page->index);
+ goto out;
+ }
+ ctx.page = page;
+ ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE;
+ ctx.param.lower_file = lower_file;
+ rc = ecryptfs_encrypt_page(&ctx);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
+ "index [0x%.16x])\n", page->index);
+ goto out;
+ }
+ rc = 0;
+ inode->i_blocks = lower_inode->i_blocks;
+ pos = (page->index << PAGE_CACHE_SHIFT) + to;
+ if (pos > i_size_read(inode)) {
+ i_size_write(inode, pos);
+ ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
+ "[0x%.16x]\n", i_size_read(inode));
+ }
+ ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
+ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+out:
+ kunmap(page); /* mapped in prior call (prepare_write) */
+ if (rc < 0)
+ ClearPageUptodate(page);
+ else
+ SetPageUptodate(page);
+ mutex_unlock(&lower_inode->i_mutex);
+ return rc;
+}
+
+/**
+ * write_zeros
+ * @file: The ecryptfs file
+ * @index: The index in which we are writing
+ * @start: The position after the last block of data
+ * @num_zeros: The number of zeros to write
+ *
+ * Write a specified number of zero's to a page.
+ *
+ * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
+ */
+static
+int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
+{
+ int rc = 0;
+ struct page *tmp_page;
+
+ tmp_page = ecryptfs_get1page(file, index);
+ if (IS_ERR(tmp_page)) {
+ ecryptfs_printk(KERN_ERR, "Error getting page at index "
+ "[0x%.16x]\n", index);
+ rc = PTR_ERR(tmp_page);
+ goto out;
+ }
+ kmap(tmp_page);
+ rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error preparing to write zero's "
+ "to remainder of page at index [0x%.16x]\n",
+ index);
+ kunmap(tmp_page);
+ page_cache_release(tmp_page);
+ goto out;
+ }
+ memset(((char *)page_address(tmp_page) + start), 0, num_zeros);
+ rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
+ if (rc < 0) {
+ ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
+ "to remainder of page at index [0x%.16x]\n",
+ index);
+ kunmap(tmp_page);
+ page_cache_release(tmp_page);
+ goto out;
+ }
+ rc = 0;
+ kunmap(tmp_page);
+ page_cache_release(tmp_page);
+out:
+ return rc;
+}
+
+static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
+{
+ int rc = 0;
+ struct inode *inode;
+ struct inode *lower_inode;
+
+ inode = (struct inode *)mapping->host;
+ lower_inode = ecryptfs_inode_to_lower(inode);
+ if (lower_inode->i_mapping->a_ops->bmap)
+ rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping,
+ block);
+ return rc;
+}
+
+static void ecryptfs_sync_page(struct page *page)
+{
+ struct inode *inode;
+ struct inode *lower_inode;
+ struct page *lower_page;
+
+ inode = page->mapping->host;
+ lower_inode = ecryptfs_inode_to_lower(inode);
+ /* NOTE: Recently swapped with grab_cache_page(), since
+ * sync_page() just makes sure that pending I/O gets done. */
+ lower_page = find_lock_page(lower_inode->i_mapping, page->index);
+ if (!lower_page) {
+ ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n");
+ return;
+ }
+ lower_page->mapping->a_ops->sync_page(lower_page);
+ ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
+ lower_page->index);
+ unlock_page(lower_page);
+ page_cache_release(lower_page);
+}
+
+struct address_space_operations ecryptfs_aops = {
+ .writepage = ecryptfs_writepage,
+ .readpage = ecryptfs_readpage,
+ .prepare_write = ecryptfs_prepare_write,
+ .commit_write = ecryptfs_commit_write,
+ .bmap = ecryptfs_bmap,
+ .sync_page = ecryptfs_sync_page,
+};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
new file mode 100644
index 000000000000..c337c0410fb1
--- /dev/null
+++ b/fs/ecryptfs/super.c
@@ -0,0 +1,198 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 1997-2003 Erez Zadok
+ * Copyright (C) 2001-2003 Stony Brook University
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Michael C. Thompson <mcthomps@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/key.h>
+#include <linux/seq_file.h>
+#include <linux/crypto.h>
+#include "ecryptfs_kernel.h"
+
+struct kmem_cache *ecryptfs_inode_info_cache;
+
+/**
+ * ecryptfs_alloc_inode - allocate an ecryptfs inode
+ * @sb: Pointer to the ecryptfs super block
+ *
+ * Called to bring an inode into existence.
+ *
+ * Only handle allocation, setting up structures should be done in
+ * ecryptfs_read_inode. This is because the kernel, between now and
+ * then, will 0 out the private data pointer.
+ *
+ * Returns a pointer to a newly allocated inode, NULL otherwise
+ */
+static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
+{
+ struct ecryptfs_inode_info *ecryptfs_inode;
+ struct inode *inode = NULL;
+
+ ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache,
+ SLAB_KERNEL);
+ if (unlikely(!ecryptfs_inode))
+ goto out;
+ ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat);
+ inode = &ecryptfs_inode->vfs_inode;
+out:
+ return inode;
+}
+
+/**
+ * ecryptfs_destroy_inode
+ * @inode: The ecryptfs inode
+ *
+ * This is used during the final destruction of the inode.
+ * All allocation of memory related to the inode, including allocated
+ * memory in the crypt_stat struct, will be released here.
+ * There should be no chance that this deallocation will be missed.
+ */
+static void ecryptfs_destroy_inode(struct inode *inode)
+{
+ struct ecryptfs_inode_info *inode_info;
+
+ inode_info = ecryptfs_inode_to_private(inode);
+ ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat);
+ kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+}
+
+/**
+ * ecryptfs_init_inode
+ * @inode: The ecryptfs inode
+ *
+ * Set up the ecryptfs inode.
+ */
+void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
+{
+ ecryptfs_set_inode_lower(inode, lower_inode);
+ inode->i_ino = lower_inode->i_ino;
+ inode->i_version++;
+ inode->i_op = &ecryptfs_main_iops;
+ inode->i_fop = &ecryptfs_main_fops;
+ inode->i_mapping->a_ops = &ecryptfs_aops;
+}
+
+/**
+ * ecryptfs_put_super
+ * @sb: Pointer to the ecryptfs super block
+ *
+ * Final actions when unmounting a file system.
+ * This will handle deallocation and release of our private data.
+ */
+static void ecryptfs_put_super(struct super_block *sb)
+{
+ struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
+
+ ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat);
+ kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
+ ecryptfs_set_superblock_private(sb, NULL);
+}
+
+/**
+ * ecryptfs_statfs
+ * @sb: The ecryptfs super block
+ * @buf: The struct kstatfs to fill in with stats
+ *
+ * Get the filesystem statistics. Currently, we let this pass right through
+ * to the lower filesystem and take no action ourselves.
+ */
+static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf);
+}
+
+/**
+ * ecryptfs_clear_inode
+ * @inode - The ecryptfs inode
+ *
+ * Called by iput() when the inode reference count reached zero
+ * and the inode is not hashed anywhere. Used to clear anything
+ * that needs to be, before the inode is completely destroyed and put
+ * on the inode free list. We use this to drop out reference to the
+ * lower inode.
+ */
+static void ecryptfs_clear_inode(struct inode *inode)
+{
+ iput(ecryptfs_inode_to_lower(inode));
+}
+
+/**
+ * ecryptfs_umount_begin
+ *
+ * Called in do_umount().
+ */
+static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags)
+{
+ struct vfsmount *lower_mnt =
+ ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root);
+ struct super_block *lower_sb;
+
+ mntput(lower_mnt);
+ lower_sb = lower_mnt->mnt_sb;
+ if (lower_sb->s_op->umount_begin)
+ lower_sb->s_op->umount_begin(lower_mnt, flags);
+}
+
+/**
+ * ecryptfs_show_options
+ *
+ * Prints the directory we are currently mounted over.
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct super_block *sb = mnt->mnt_sb;
+ struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root);
+ struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root);
+ char *tmp_page;
+ char *path;
+ int rc = 0;
+
+ tmp_page = (char *)__get_free_page(GFP_KERNEL);
+ if (!tmp_page) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE);
+ if (IS_ERR(path)) {
+ rc = PTR_ERR(path);
+ goto out;
+ }
+ seq_printf(m, ",dir=%s", path);
+ free_page((unsigned long)tmp_page);
+out:
+ return rc;
+}
+
+struct super_operations ecryptfs_sops = {
+ .alloc_inode = ecryptfs_alloc_inode,
+ .destroy_inode = ecryptfs_destroy_inode,
+ .drop_inode = generic_delete_inode,
+ .put_super = ecryptfs_put_super,
+ .statfs = ecryptfs_statfs,
+ .remount_fs = NULL,
+ .clear_inode = ecryptfs_clear_inode,
+ .umount_begin = ecryptfs_umount_begin,
+ .show_options = ecryptfs_show_options
+};
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 87e1d03e8267..e8c7765419e8 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -144,42 +144,12 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
*/
/*
- * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
- * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
- */
-static void nlmclnt_prepare_reclaim(struct nlm_host *host)
-{
- down_write(&host->h_rwsem);
- host->h_monitored = 0;
- host->h_state++;
- host->h_nextrebind = 0;
- nlm_rebind_host(host);
-
- /*
- * Mark the locks for reclaiming.
- */
- list_splice_init(&host->h_granted, &host->h_reclaim);
-
- dprintk("NLM: reclaiming locks for host %s\n", host->h_name);
-}
-
-static void nlmclnt_finish_reclaim(struct nlm_host *host)
-{
- host->h_reclaiming = 0;
- up_write(&host->h_rwsem);
- dprintk("NLM: done reclaiming locks for host %s", host->h_name);
-}
-
-/*
* Reclaim all locks on server host. We do this by spawning a separate
* reclaimer thread.
*/
void
-nlmclnt_recovery(struct nlm_host *host, u32 newstate)
+nlmclnt_recovery(struct nlm_host *host)
{
- if (host->h_nsmstate == newstate)
- return;
- host->h_nsmstate = newstate;
if (!host->h_reclaiming++) {
nlm_get_host(host);
__module_get(THIS_MODULE);
@@ -199,18 +169,30 @@ reclaimer(void *ptr)
daemonize("%s-reclaim", host->h_name);
allow_signal(SIGKILL);
+ down_write(&host->h_rwsem);
+
/* This one ensures that our parent doesn't terminate while the
* reclaim is in progress */
lock_kernel();
lockd_up(0); /* note: this cannot fail as lockd is already running */
- nlmclnt_prepare_reclaim(host);
- /* First, reclaim all locks that have been marked. */
+ dprintk("lockd: reclaiming locks for host %s", host->h_name);
+
restart:
nsmstate = host->h_nsmstate;
+
+ /* Force a portmap getport - the peer's lockd will
+ * most likely end up on a different port.
+ */
+ host->h_nextrebind = jiffies;
+ nlm_rebind_host(host);
+
+ /* First, reclaim all locks that have been granted. */
+ list_splice_init(&host->h_granted, &host->h_reclaim);
list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
list_del_init(&fl->fl_u.nfs_fl.list);
+ /* Why are we leaking memory here? --okir */
if (signalled())
continue;
if (nlmclnt_reclaim(host, fl) != 0)
@@ -218,11 +200,13 @@ restart:
list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
if (host->h_nsmstate != nsmstate) {
/* Argh! The server rebooted again! */
- list_splice_init(&host->h_granted, &host->h_reclaim);
goto restart;
}
}
- nlmclnt_finish_reclaim(host);
+
+ host->h_reclaiming = 0;
+ up_write(&host->h_rwsem);
+ dprintk("NLM: done reclaiming locks for host %s", host->h_name);
/* Now, wake up all processes that sleep on a blocked lock */
list_for_each_entry(block, &nlm_blocked, b_list) {
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 0116729cec5f..3d84f600b633 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -36,14 +36,14 @@ static const struct rpc_call_ops nlmclnt_cancel_ops;
/*
* Cookie counter for NLM requests
*/
-static u32 nlm_cookie = 0x1234;
+static atomic_t nlm_cookie = ATOMIC_INIT(0x1234);
-static inline void nlmclnt_next_cookie(struct nlm_cookie *c)
+void nlmclnt_next_cookie(struct nlm_cookie *c)
{
- memcpy(c->data, &nlm_cookie, 4);
- memset(c->data+4, 0, 4);
+ u32 cookie = atomic_inc_return(&nlm_cookie);
+
+ memcpy(c->data, &cookie, 4);
c->len=4;
- nlm_cookie++;
}
static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
@@ -153,6 +153,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
{
struct rpc_clnt *client = NFS_CLIENT(inode);
struct sockaddr_in addr;
+ struct nfs_server *nfssrv = NFS_SERVER(inode);
struct nlm_host *host;
struct nlm_rqst *call;
sigset_t oldset;
@@ -166,7 +167,9 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
}
rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
- host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
+ host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers,
+ nfssrv->nfs_client->cl_hostname,
+ strlen(nfssrv->nfs_client->cl_hostname));
if (host == NULL)
return -ENOLCK;
@@ -499,7 +502,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
unsigned char fl_flags = fl->fl_flags;
int status = -ENOLCK;
- if (!host->h_monitored && nsm_monitor(host) < 0) {
+ if (nsm_monitor(host) < 0) {
printk(KERN_NOTICE "lockd: failed to monitor %s\n",
host->h_name);
goto out;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index a0d0b58ce7a4..fb24a9730345 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -27,46 +27,60 @@
#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ)
-static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH];
+static struct hlist_head nlm_hosts[NLM_HOST_NRHASH];
static unsigned long next_gc;
static int nrhosts;
static DEFINE_MUTEX(nlm_host_mutex);
static void nlm_gc_hosts(void);
+static struct nsm_handle * __nsm_find(const struct sockaddr_in *,
+ const char *, int, int);
/*
* Find an NLM server handle in the cache. If there is none, create it.
*/
struct nlm_host *
-nlmclnt_lookup_host(struct sockaddr_in *sin, int proto, int version)
+nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
+ const char *hostname, int hostname_len)
{
- return nlm_lookup_host(0, sin, proto, version);
+ return nlm_lookup_host(0, sin, proto, version,
+ hostname, hostname_len);
}
/*
* Find an NLM client handle in the cache. If there is none, create it.
*/
struct nlm_host *
-nlmsvc_lookup_host(struct svc_rqst *rqstp)
+nlmsvc_lookup_host(struct svc_rqst *rqstp,
+ const char *hostname, int hostname_len)
{
return nlm_lookup_host(1, &rqstp->rq_addr,
- rqstp->rq_prot, rqstp->rq_vers);
+ rqstp->rq_prot, rqstp->rq_vers,
+ hostname, hostname_len);
}
/*
* Common host lookup routine for server & client
*/
struct nlm_host *
-nlm_lookup_host(int server, struct sockaddr_in *sin,
- int proto, int version)
+nlm_lookup_host(int server, const struct sockaddr_in *sin,
+ int proto, int version,
+ const char *hostname,
+ int hostname_len)
{
- struct nlm_host *host, **hp;
- u32 addr;
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+ struct nlm_host *host;
+ struct nsm_handle *nsm = NULL;
int hash;
- dprintk("lockd: nlm_lookup_host(%08x, p=%d, v=%d)\n",
- (unsigned)(sin? ntohl(sin->sin_addr.s_addr) : 0), proto, version);
+ dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
+ NIPQUAD(sin->sin_addr.s_addr), proto, version,
+ server? "server" : "client",
+ hostname_len,
+ hostname? hostname : "<none>");
+
hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
@@ -76,7 +90,22 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
if (time_after_eq(jiffies, next_gc))
nlm_gc_hosts();
- for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) {
+ /* We may keep several nlm_host objects for a peer, because each
+ * nlm_host is identified by
+ * (address, protocol, version, server/client)
+ * We could probably simplify this a little by putting all those
+ * different NLM rpc_clients into one single nlm_host object.
+ * This would allow us to have one nlm_host per address.
+ */
+ chain = &nlm_hosts[hash];
+ hlist_for_each_entry(host, pos, chain, h_hash) {
+ if (!nlm_cmp_addr(&host->h_addr, sin))
+ continue;
+
+ /* See if we have an NSM handle for this client */
+ if (!nsm)
+ nsm = host->h_nsmhandle;
+
if (host->h_proto != proto)
continue;
if (host->h_version != version)
@@ -84,28 +113,30 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
if (host->h_server != server)
continue;
- if (nlm_cmp_addr(&host->h_addr, sin)) {
- if (hp != nlm_hosts + hash) {
- *hp = host->h_next;
- host->h_next = nlm_hosts[hash];
- nlm_hosts[hash] = host;
- }
- nlm_get_host(host);
- mutex_unlock(&nlm_host_mutex);
- return host;
- }
- }
+ /* Move to head of hash chain. */
+ hlist_del(&host->h_hash);
+ hlist_add_head(&host->h_hash, chain);
- /* Ooops, no host found, create it */
- dprintk("lockd: creating host entry\n");
+ nlm_get_host(host);
+ goto out;
+ }
+ if (nsm)
+ atomic_inc(&nsm->sm_count);
- host = kzalloc(sizeof(*host), GFP_KERNEL);
- if (!host)
- goto nohost;
+ host = NULL;
- addr = sin->sin_addr.s_addr;
- sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr));
+ /* Sadly, the host isn't in our hash table yet. See if
+ * we have an NSM handle for it. If not, create one.
+ */
+ if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len)))
+ goto out;
+ host = kzalloc(sizeof(*host), GFP_KERNEL);
+ if (!host) {
+ nsm_release(nsm);
+ goto out;
+ }
+ host->h_name = nsm->sm_name;
host->h_addr = *sin;
host->h_addr.sin_port = 0; /* ouch! */
host->h_version = version;
@@ -119,9 +150,9 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
init_rwsem(&host->h_rwsem);
host->h_state = 0; /* pseudo NSM state */
host->h_nsmstate = 0; /* real NSM state */
+ host->h_nsmhandle = nsm;
host->h_server = server;
- host->h_next = nlm_hosts[hash];
- nlm_hosts[hash] = host;
+ hlist_add_head(&host->h_hash, chain);
INIT_LIST_HEAD(&host->h_lockowners);
spin_lock_init(&host->h_lock);
INIT_LIST_HEAD(&host->h_granted);
@@ -130,35 +161,39 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
if (++nrhosts > NLM_HOST_MAX)
next_gc = 0;
-nohost:
+out:
mutex_unlock(&nlm_host_mutex);
return host;
}
-struct nlm_host *
-nlm_find_client(void)
+/*
+ * Destroy a host
+ */
+static void
+nlm_destroy_host(struct nlm_host *host)
{
- /* find a nlm_host for a client for which h_killed == 0.
- * and return it
+ struct rpc_clnt *clnt;
+
+ BUG_ON(!list_empty(&host->h_lockowners));
+ BUG_ON(atomic_read(&host->h_count));
+
+ /*
+ * Release NSM handle and unmonitor host.
*/
- int hash;
- mutex_lock(&nlm_host_mutex);
- for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) {
- struct nlm_host *host, **hp;
- for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) {
- if (host->h_server &&
- host->h_killed == 0) {
- nlm_get_host(host);
- mutex_unlock(&nlm_host_mutex);
- return host;
- }
+ nsm_unmonitor(host);
+
+ if ((clnt = host->h_rpcclnt) != NULL) {
+ if (atomic_read(&clnt->cl_users)) {
+ printk(KERN_WARNING
+ "lockd: active RPC handle\n");
+ clnt->cl_dead = 1;
+ } else {
+ rpc_destroy_client(host->h_rpcclnt);
}
}
- mutex_unlock(&nlm_host_mutex);
- return NULL;
+ kfree(host);
}
-
/*
* Create the NLM RPC client for an NLM peer
*/
@@ -260,22 +295,82 @@ void nlm_release_host(struct nlm_host *host)
}
/*
+ * We were notified that the host indicated by address &sin
+ * has rebooted.
+ * Release all resources held by that peer.
+ */
+void nlm_host_rebooted(const struct sockaddr_in *sin,
+ const char *hostname, int hostname_len,
+ u32 new_state)
+{
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+ struct nsm_handle *nsm;
+ struct nlm_host *host;
+
+ dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n",
+ hostname, NIPQUAD(sin->sin_addr));
+
+ /* Find the NSM handle for this peer */
+ if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0)))
+ return;
+
+ /* When reclaiming locks on this peer, make sure that
+ * we set up a new notification */
+ nsm->sm_monitored = 0;
+
+ /* Mark all hosts tied to this NSM state as having rebooted.
+ * We run the loop repeatedly, because we drop the host table
+ * lock for this.
+ * To avoid processing a host several times, we match the nsmstate.
+ */
+again: mutex_lock(&nlm_host_mutex);
+ for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
+ hlist_for_each_entry(host, pos, chain, h_hash) {
+ if (host->h_nsmhandle == nsm
+ && host->h_nsmstate != new_state) {
+ host->h_nsmstate = new_state;
+ host->h_state++;
+
+ nlm_get_host(host);
+ mutex_unlock(&nlm_host_mutex);
+
+ if (host->h_server) {
+ /* We're server for this guy, just ditch
+ * all the locks he held. */
+ nlmsvc_free_host_resources(host);
+ } else {
+ /* He's the server, initiate lock recovery. */
+ nlmclnt_recovery(host);
+ }
+
+ nlm_release_host(host);
+ goto again;
+ }
+ }
+ }
+
+ mutex_unlock(&nlm_host_mutex);
+}
+
+/*
* Shut down the hosts module.
* Note that this routine is called only at server shutdown time.
*/
void
nlm_shutdown_hosts(void)
{
+ struct hlist_head *chain;
+ struct hlist_node *pos;
struct nlm_host *host;
- int i;
dprintk("lockd: shutting down host module\n");
mutex_lock(&nlm_host_mutex);
/* First, make all hosts eligible for gc */
dprintk("lockd: nuking all hosts...\n");
- for (i = 0; i < NLM_HOST_NRHASH; i++) {
- for (host = nlm_hosts[i]; host; host = host->h_next)
+ for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
+ hlist_for_each_entry(host, pos, chain, h_hash)
host->h_expires = jiffies - 1;
}
@@ -287,8 +382,8 @@ nlm_shutdown_hosts(void)
if (nrhosts) {
printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
dprintk("lockd: %d hosts left:\n", nrhosts);
- for (i = 0; i < NLM_HOST_NRHASH; i++) {
- for (host = nlm_hosts[i]; host; host = host->h_next) {
+ for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
+ hlist_for_each_entry(host, pos, chain, h_hash) {
dprintk(" %s (cnt %d use %d exp %ld)\n",
host->h_name, atomic_read(&host->h_count),
host->h_inuse, host->h_expires);
@@ -305,45 +400,32 @@ nlm_shutdown_hosts(void)
static void
nlm_gc_hosts(void)
{
- struct nlm_host **q, *host;
- struct rpc_clnt *clnt;
- int i;
+ struct hlist_head *chain;
+ struct hlist_node *pos, *next;
+ struct nlm_host *host;
dprintk("lockd: host garbage collection\n");
- for (i = 0; i < NLM_HOST_NRHASH; i++) {
- for (host = nlm_hosts[i]; host; host = host->h_next)
+ for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
+ hlist_for_each_entry(host, pos, chain, h_hash)
host->h_inuse = 0;
}
/* Mark all hosts that hold locks, blocks or shares */
nlmsvc_mark_resources();
- for (i = 0; i < NLM_HOST_NRHASH; i++) {
- q = &nlm_hosts[i];
- while ((host = *q) != NULL) {
+ for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
+ hlist_for_each_entry_safe(host, pos, next, chain, h_hash) {
if (atomic_read(&host->h_count) || host->h_inuse
|| time_before(jiffies, host->h_expires)) {
dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n",
host->h_name, atomic_read(&host->h_count),
host->h_inuse, host->h_expires);
- q = &host->h_next;
continue;
}
dprintk("lockd: delete host %s\n", host->h_name);
- *q = host->h_next;
- /* Don't unmonitor hosts that have been invalidated */
- if (host->h_monitored && !host->h_killed)
- nsm_unmonitor(host);
- if ((clnt = host->h_rpcclnt) != NULL) {
- if (atomic_read(&clnt->cl_users)) {
- printk(KERN_WARNING
- "lockd: active RPC handle\n");
- clnt->cl_dead = 1;
- } else {
- rpc_destroy_client(host->h_rpcclnt);
- }
- }
- kfree(host);
+ hlist_del_init(&host->h_hash);
+
+ nlm_destroy_host(host);
nrhosts--;
}
}
@@ -351,3 +433,88 @@ nlm_gc_hosts(void)
next_gc = jiffies + NLM_HOST_COLLECT;
}
+
+/*
+ * Manage NSM handles
+ */
+static LIST_HEAD(nsm_handles);
+static DEFINE_MUTEX(nsm_mutex);
+
+static struct nsm_handle *
+__nsm_find(const struct sockaddr_in *sin,
+ const char *hostname, int hostname_len,
+ int create)
+{
+ struct nsm_handle *nsm = NULL;
+ struct list_head *pos;
+
+ if (!sin)
+ return NULL;
+
+ if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
+ if (printk_ratelimit()) {
+ printk(KERN_WARNING "Invalid hostname \"%.*s\" "
+ "in NFS lock request\n",
+ hostname_len, hostname);
+ }
+ return NULL;
+ }
+
+ mutex_lock(&nsm_mutex);
+ list_for_each(pos, &nsm_handles) {
+ nsm = list_entry(pos, struct nsm_handle, sm_link);
+
+ if (hostname && nsm_use_hostnames) {
+ if (strlen(nsm->sm_name) != hostname_len
+ || memcmp(nsm->sm_name, hostname, hostname_len))
+ continue;
+ } else if (!nlm_cmp_addr(&nsm->sm_addr, sin))
+ continue;
+ atomic_inc(&nsm->sm_count);
+ goto out;
+ }
+
+ if (!create) {
+ nsm = NULL;
+ goto out;
+ }
+
+ nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
+ if (nsm != NULL) {
+ nsm->sm_addr = *sin;
+ nsm->sm_name = (char *) (nsm + 1);
+ memcpy(nsm->sm_name, hostname, hostname_len);
+ nsm->sm_name[hostname_len] = '\0';
+ atomic_set(&nsm->sm_count, 1);
+
+ list_add(&nsm->sm_link, &nsm_handles);
+ }
+
+out:
+ mutex_unlock(&nsm_mutex);
+ return nsm;
+}
+
+struct nsm_handle *
+nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
+{
+ return __nsm_find(sin, hostname, hostname_len, 1);
+}
+
+/*
+ * Release an NSM handle
+ */
+void
+nsm_release(struct nsm_handle *nsm)
+{
+ if (!nsm)
+ return;
+ if (atomic_dec_and_test(&nsm->sm_count)) {
+ mutex_lock(&nsm_mutex);
+ if (atomic_read(&nsm->sm_count) == 0) {
+ list_del(&nsm->sm_link);
+ kfree(nsm);
+ }
+ mutex_unlock(&nsm_mutex);
+ }
+}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a816b920d431..e0179f8c327f 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -24,13 +24,13 @@ static struct rpc_program nsm_program;
/*
* Local NSM state
*/
-u32 nsm_local_state;
+int nsm_local_state;
/*
* Common procedure for SM_MON/SM_UNMON calls
*/
static int
-nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
+nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
{
struct rpc_clnt *clnt;
int status;
@@ -46,10 +46,11 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
goto out;
}
- args.addr = host->h_addr.sin_addr.s_addr;
- args.proto= (host->h_proto<<1) | host->h_server;
+ memset(&args, 0, sizeof(args));
+ args.mon_name = nsm->sm_name;
+ args.addr = nsm->sm_addr.sin_addr.s_addr;
args.prog = NLM_PROGRAM;
- args.vers = host->h_version;
+ args.vers = 3;
args.proc = NLMPROC_NSM_NOTIFY;
memset(res, 0, sizeof(*res));
@@ -70,17 +71,22 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
int
nsm_monitor(struct nlm_host *host)
{
+ struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status;
dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
+ BUG_ON(nsm == NULL);
- status = nsm_mon_unmon(host, SM_MON, &res);
+ if (nsm->sm_monitored)
+ return 0;
+
+ status = nsm_mon_unmon(nsm, SM_MON, &res);
if (status < 0 || res.status != 0)
printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name);
else
- host->h_monitored = 1;
+ nsm->sm_monitored = 1;
return status;
}
@@ -90,16 +96,26 @@ nsm_monitor(struct nlm_host *host)
int
nsm_unmonitor(struct nlm_host *host)
{
+ struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
- int status;
-
- dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
-
- status = nsm_mon_unmon(host, SM_UNMON, &res);
- if (status < 0)
- printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name);
- else
- host->h_monitored = 0;
+ int status = 0;
+
+ if (nsm == NULL)
+ return 0;
+ host->h_nsmhandle = NULL;
+
+ if (atomic_read(&nsm->sm_count) == 1
+ && nsm->sm_monitored && !nsm->sm_sticky) {
+ dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
+
+ status = nsm_mon_unmon(nsm, SM_UNMON, &res);
+ if (status < 0)
+ printk(KERN_NOTICE "lockd: cannot unmonitor %s\n",
+ host->h_name);
+ else
+ nsm->sm_monitored = 0;
+ }
+ nsm_release(nsm);
return status;
}
@@ -135,7 +151,7 @@ nsm_create(void)
static u32 *
xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
{
- char buffer[20];
+ char buffer[20], *name;
/*
* Use the dotted-quad IP address of the remote host as
@@ -143,8 +159,13 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
* hostname first for whatever remote hostname it receives,
* so this works alright.
*/
- sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
- if (!(p = xdr_encode_string(p, buffer))
+ if (nsm_use_hostnames) {
+ name = argp->mon_name;
+ } else {
+ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
+ name = buffer;
+ }
+ if (!(p = xdr_encode_string(p, name))
|| !(p = xdr_encode_string(p, utsname()->nodename)))
return ERR_PTR(-EIO);
*p++ = htonl(argp->prog);
@@ -160,9 +181,11 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
p = xdr_encode_common(rqstp, p, argp);
if (IS_ERR(p))
return PTR_ERR(p);
+
+ /* Surprise - there may even be room for an IPv6 address now */
*p++ = argp->addr;
- *p++ = argp->vers;
- *p++ = argp->proto;
+ *p++ = 0;
+ *p++ = 0;
*p++ = 0;
rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
return 0;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 3cc369e5693f..634139232aaf 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -33,6 +33,7 @@
#include <linux/sunrpc/svcsock.h>
#include <net/ip.h>
#include <linux/lockd/lockd.h>
+#include <linux/lockd/sm_inter.h>
#include <linux/nfs.h>
#define NLMDBG_FACILITY NLMDBG_SVC
@@ -61,6 +62,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
static unsigned long nlm_grace_period;
static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO;
static int nlm_udpport, nlm_tcpport;
+int nsm_use_hostnames = 0;
/*
* Constants needed for the sysctl interface.
@@ -395,6 +397,22 @@ static ctl_table nlm_sysctls[] = {
.extra1 = (int *) &nlm_port_min,
.extra2 = (int *) &nlm_port_max,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nsm_use_hostnames",
+ .data = &nsm_use_hostnames,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nsm_local_state",
+ .data = &nsm_local_state,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
@@ -483,6 +501,7 @@ module_param_call(nlm_udpport, param_set_port, param_get_int,
&nlm_udpport, 0644);
module_param_call(nlm_tcpport, param_set_port, param_get_int,
&nlm_tcpport, 0644);
+module_param(nsm_use_hostnames, bool, 0644);
/*
* Initialising and terminating the module.
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a2dd9ccb9b32..fa370f6eb07b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -38,8 +38,8 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return nlm_lck_denied_nolocks;
/* Obtain host handle */
- if (!(host = nlmsvc_lookup_host(rqstp))
- || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0))
+ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
+ || (argp->monitor && nsm_monitor(host) < 0))
goto no_locks;
*hostp = host;
@@ -260,7 +260,9 @@ static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *a
struct nlm_rqst *call;
int stat;
- host = nlmsvc_lookup_host(rqstp);
+ host = nlmsvc_lookup_host(rqstp,
+ argp->lock.caller,
+ argp->lock.len);
if (host == NULL)
return rpc_system_err;
@@ -420,10 +422,6 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
void *resp)
{
struct sockaddr_in saddr = rqstp->rq_addr;
- int vers = argp->vers;
- int prot = argp->proto >> 1;
-
- struct nlm_host *host;
dprintk("lockd: SM_NOTIFY called\n");
if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
@@ -438,21 +436,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
/* Obtain the host pointer for this NFS server and try to
* reclaim all locks we hold on this server.
*/
+ memset(&saddr, 0, sizeof(saddr));
saddr.sin_addr.s_addr = argp->addr;
+ nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
- if ((argp->proto & 1)==0) {
- if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) {
- nlmclnt_recovery(host, argp->state);
- nlm_release_host(host);
- }
- } else {
- /* If we run on an NFS server, delete all locks held by the client */
-
- if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) {
- nlmsvc_free_host_resources(host);
- nlm_release_host(host);
- }
- }
return rpc_success;
}
@@ -468,7 +455,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
dprintk("lockd: GRANTED_RES called\n");
- nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status);
+ nlmsvc_grant_reply(&argp->cookie, argp->status);
return rpc_success;
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 93c00ee7189d..814c6064c9e0 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -40,7 +40,7 @@
static void nlmsvc_release_block(struct nlm_block *block);
static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
-static int nlmsvc_remove_block(struct nlm_block *block);
+static void nlmsvc_remove_block(struct nlm_block *block);
static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
static void nlmsvc_freegrantargs(struct nlm_rqst *call);
@@ -49,7 +49,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
/*
* The list of blocked locks to retry
*/
-static struct nlm_block * nlm_blocked;
+static LIST_HEAD(nlm_blocked);
/*
* Insert a blocked lock into the global list
@@ -57,48 +57,44 @@ static struct nlm_block * nlm_blocked;
static void
nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
{
- struct nlm_block **bp, *b;
+ struct nlm_block *b;
+ struct list_head *pos;
dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
- kref_get(&block->b_count);
- if (block->b_queued)
- nlmsvc_remove_block(block);
- bp = &nlm_blocked;
+ if (list_empty(&block->b_list)) {
+ kref_get(&block->b_count);
+ } else {
+ list_del_init(&block->b_list);
+ }
+
+ pos = &nlm_blocked;
if (when != NLM_NEVER) {
if ((when += jiffies) == NLM_NEVER)
when ++;
- while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER)
- bp = &b->b_next;
- } else
- while ((b = *bp) != 0)
- bp = &b->b_next;
+ list_for_each(pos, &nlm_blocked) {
+ b = list_entry(pos, struct nlm_block, b_list);
+ if (time_after(b->b_when,when) || b->b_when == NLM_NEVER)
+ break;
+ }
+ /* On normal exit from the loop, pos == &nlm_blocked,
+ * so we will be adding to the end of the list - good
+ */
+ }
- block->b_queued = 1;
+ list_add_tail(&block->b_list, pos);
block->b_when = when;
- block->b_next = b;
- *bp = block;
}
/*
* Remove a block from the global list
*/
-static int
+static inline void
nlmsvc_remove_block(struct nlm_block *block)
{
- struct nlm_block **bp, *b;
-
- if (!block->b_queued)
- return 1;
- for (bp = &nlm_blocked; (b = *bp) != 0; bp = &b->b_next) {
- if (b == block) {
- *bp = block->b_next;
- block->b_queued = 0;
- nlmsvc_release_block(block);
- return 1;
- }
+ if (!list_empty(&block->b_list)) {
+ list_del_init(&block->b_list);
+ nlmsvc_release_block(block);
}
-
- return 0;
}
/*
@@ -107,14 +103,14 @@ nlmsvc_remove_block(struct nlm_block *block)
static struct nlm_block *
nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
{
- struct nlm_block **head, *block;
+ struct nlm_block *block;
struct file_lock *fl;
dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
file, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end, lock->fl.fl_type);
- for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
+ list_for_each_entry(block, &nlm_blocked, b_list) {
fl = &block->b_call->a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
block->b_file, fl->fl_pid,
@@ -143,20 +139,20 @@ static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
* Find a block with a given NLM cookie.
*/
static inline struct nlm_block *
-nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
+nlmsvc_find_block(struct nlm_cookie *cookie)
{
struct nlm_block *block;
- for (block = nlm_blocked; block; block = block->b_next) {
- dprintk("cookie: head of blocked queue %p, block %p\n",
- nlm_blocked, block);
- if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
- && nlm_cmp_addr(sin, &block->b_host->h_addr))
- break;
+ list_for_each_entry(block, &nlm_blocked, b_list) {
+ if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
+ goto found;
}
- if (block != NULL)
- kref_get(&block->b_count);
+ return NULL;
+
+found:
+ dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
+ kref_get(&block->b_count);
return block;
}
@@ -169,6 +165,11 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
* request, but (as I found out later) that's because some implementations
* do just this. Never mind the standards comittees, they support our
* logging industries.
+ *
+ * 10 years later: I hope we can safely ignore these old and broken
+ * clients by now. Let's fix this so we can uniquely identify an incoming
+ * GRANTED_RES message by cookie, without having to rely on the client's IP
+ * address. --okir
*/
static inline struct nlm_block *
nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
@@ -179,7 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_rqst *call = NULL;
/* Create host handle for callback */
- host = nlmsvc_lookup_host(rqstp);
+ host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
if (host == NULL)
return NULL;
@@ -192,6 +193,8 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
if (block == NULL)
goto failed;
kref_init(&block->b_count);
+ INIT_LIST_HEAD(&block->b_list);
+ INIT_LIST_HEAD(&block->b_flist);
if (!nlmsvc_setgrantargs(call, lock))
goto failed_free;
@@ -199,7 +202,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
/* Set notifier function for VFS, and init args */
call->a_args.lock.fl.fl_flags |= FL_SLEEP;
call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
- call->a_args.cookie = *cookie; /* see above */
+ nlmclnt_next_cookie(&call->a_args.cookie);
dprintk("lockd: created block %p...\n", block);
@@ -210,8 +213,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
file->f_count++;
/* Add to file's list of blocks */
- block->b_fnext = file->f_blocks;
- file->f_blocks = block;
+ list_add(&block->b_flist, &file->f_blocks);
/* Set up RPC arguments for callback */
block->b_call = call;
@@ -248,19 +250,13 @@ static void nlmsvc_free_block(struct kref *kref)
{
struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
struct nlm_file *file = block->b_file;
- struct nlm_block **bp;
dprintk("lockd: freeing block %p...\n", block);
- down(&file->f_sema);
/* Remove block from file's list of blocks */
- for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
- if (*bp == block) {
- *bp = block->b_fnext;
- break;
- }
- }
- up(&file->f_sema);
+ mutex_lock(&file->f_mutex);
+ list_del_init(&block->b_flist);
+ mutex_unlock(&file->f_mutex);
nlmsvc_freegrantargs(block->b_call);
nlm_release_call(block->b_call);
@@ -274,47 +270,32 @@ static void nlmsvc_release_block(struct nlm_block *block)
kref_put(&block->b_count, nlmsvc_free_block);
}
-static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
-{
- struct nlm_block *block;
-
- down(&file->f_sema);
- for (block = file->f_blocks; block != NULL; block = block->b_fnext)
- block->b_host->h_inuse = 1;
- up(&file->f_sema);
-}
-
-static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+/*
+ * Loop over all blocks and delete blocks held by
+ * a matching host.
+ */
+void nlmsvc_traverse_blocks(struct nlm_host *host,
+ struct nlm_file *file,
+ nlm_host_match_fn_t match)
{
- struct nlm_block *block;
+ struct nlm_block *block, *next;
restart:
- down(&file->f_sema);
- for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
- if (host != NULL && host != block->b_host)
+ mutex_lock(&file->f_mutex);
+ list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
+ if (!match(block->b_host, host))
continue;
- if (!block->b_queued)
+ /* Do not destroy blocks that are not on
+ * the global retry list - why? */
+ if (list_empty(&block->b_list))
continue;
kref_get(&block->b_count);
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
goto restart;
}
- up(&file->f_sema);
-}
-
-/*
- * Loop over all blocks and perform the action specified.
- * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
- */
-void
-nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
-{
- if (action == NLM_ACT_MARK)
- nlmsvc_act_mark(host, file);
- else
- nlmsvc_act_unlock(host, file);
+ mutex_unlock(&file->f_mutex);
}
/*
@@ -373,7 +354,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
lock->fl.fl_flags &= ~FL_SLEEP;
again:
/* Lock file against concurrent access */
- down(&file->f_sema);
+ mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting) */
block = nlmsvc_lookup_block(file, lock);
if (block == NULL) {
@@ -411,10 +392,10 @@ again:
/* If we don't have a block, create and initialize it. Then
* retry because we may have slept in kmalloc. */
- /* We have to release f_sema as nlmsvc_create_block may try to
+ /* We have to release f_mutex as nlmsvc_create_block may try to
* to claim it while doing host garbage collection */
if (newblock == NULL) {
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
dprintk("lockd: blocking on this lock (allocating).\n");
if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
return nlm_lck_denied_nolocks;
@@ -424,7 +405,7 @@ again:
/* Append to list of blocked */
nlmsvc_insert_block(newblock, NLM_NEVER);
out:
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
nlmsvc_release_block(newblock);
nlmsvc_release_block(block);
dprintk("lockd: nlmsvc_lock returned %u\n", ret);
@@ -451,6 +432,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
(long long)conflock->fl.fl_start,
(long long)conflock->fl.fl_end);
conflock->caller = "somehost"; /* FIXME */
+ conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
conflock->svid = conflock->fl.fl_pid;
return nlm_lck_denied;
@@ -507,9 +489,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
- down(&file->f_sema);
+ mutex_lock(&file->f_mutex);
block = nlmsvc_lookup_block(file, lock);
- up(&file->f_sema);
+ mutex_unlock(&file->f_mutex);
if (block != NULL) {
status = nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
@@ -527,10 +509,10 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
static void
nlmsvc_notify_blocked(struct file_lock *fl)
{
- struct nlm_block **bp, *block;
+ struct nlm_block *block;
dprintk("lockd: VFS unblock notification for block %p\n", fl);
- for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
+ list_for_each_entry(block, &nlm_blocked, b_list) {
if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
nlmsvc_insert_block(block, 0);
svc_wake_up(block->b_daemon);
@@ -663,17 +645,14 @@ static const struct rpc_call_ops nlmsvc_grant_ops = {
* block.
*/
void
-nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status)
+nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status)
{
struct nlm_block *block;
- struct nlm_file *file;
- dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n",
- *(unsigned int *)(cookie->data),
- ntohl(rqstp->rq_addr.sin_addr.s_addr), status);
- if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr)))
+ dprintk("grant_reply: looking for cookie %x, s=%d \n",
+ *(unsigned int *)(cookie->data), status);
+ if (!(block = nlmsvc_find_block(cookie)))
return;
- file = block->b_file;
if (block) {
if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
@@ -696,16 +675,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
unsigned long
nlmsvc_retry_blocked(void)
{
- struct nlm_block *block;
+ unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+ struct nlm_block *block;
+
+ while (!list_empty(&nlm_blocked)) {
+ block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
- dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
- nlm_blocked,
- nlm_blocked? nlm_blocked->b_when : 0);
- while ((block = nlm_blocked) != 0) {
if (block->b_when == NLM_NEVER)
break;
- if (time_after(block->b_when,jiffies))
+ if (time_after(block->b_when,jiffies)) {
+ timeout = block->b_when - jiffies;
break;
+ }
+
dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
block, block->b_when);
kref_get(&block->b_count);
@@ -713,8 +695,5 @@ nlmsvc_retry_blocked(void)
nlmsvc_release_block(block);
}
- if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
- return (block->b_when - jiffies);
-
- return MAX_SCHEDULE_TIMEOUT;
+ return timeout;
}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index dbb66a3b5cd9..75b2c81bcb93 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -66,8 +66,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
return nlm_lck_denied_nolocks;
/* Obtain host handle */
- if (!(host = nlmsvc_lookup_host(rqstp))
- || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0))
+ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
+ || (argp->monitor && nsm_monitor(host) < 0))
goto no_locks;
*hostp = host;
@@ -287,7 +287,9 @@ static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *ar
struct nlm_rqst *call;
int stat;
- host = nlmsvc_lookup_host(rqstp);
+ host = nlmsvc_lookup_host(rqstp,
+ argp->lock.caller,
+ argp->lock.len);
if (host == NULL)
return rpc_system_err;
@@ -449,9 +451,6 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
void *resp)
{
struct sockaddr_in saddr = rqstp->rq_addr;
- int vers = argp->vers;
- int prot = argp->proto >> 1;
- struct nlm_host *host;
dprintk("lockd: SM_NOTIFY called\n");
if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
@@ -466,19 +465,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
/* Obtain the host pointer for this NFS server and try to
* reclaim all locks we hold on this server.
*/
+ memset(&saddr, 0, sizeof(saddr));
saddr.sin_addr.s_addr = argp->addr;
- if ((argp->proto & 1)==0) {
- if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) {
- nlmclnt_recovery(host, argp->state);
- nlm_release_host(host);
- }
- } else {
- /* If we run on an NFS server, delete all locks held by the client */
- if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) {
- nlmsvc_free_host_resources(host);
- nlm_release_host(host);
- }
- }
+ nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
return rpc_success;
}
@@ -495,7 +484,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
dprintk("lockd: GRANTED_RES called\n");
- nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status);
+ nlmsvc_grant_reply(&argp->cookie, argp->status);
return rpc_success;
}
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 27288c83da96..b9926ce8782e 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -85,24 +85,20 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
}
/*
- * Traverse all shares for a given file (and host).
- * NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
+ * Traverse all shares for a given file, and delete
+ * those owned by the given (type of) host
*/
-void
-nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
+void nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file,
+ nlm_host_match_fn_t match)
{
struct nlm_share *share, **shpp;
shpp = &file->f_shares;
while ((share = *shpp) != NULL) {
- if (action == NLM_ACT_MARK)
- share->s_host->h_inuse = 1;
- else if (action == NLM_ACT_UNLOCK) {
- if (host == NULL || host == share->s_host) {
- *shpp = share->s_next;
- kfree(share);
- continue;
- }
+ if (match(share->s_host, host)) {
+ *shpp = share->s_next;
+ kfree(share);
+ continue;
}
shpp = &share->s_next;
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index a92dd98f8401..514f5f20701e 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -25,9 +25,9 @@
/*
* Global file hash table
*/
-#define FILE_HASH_BITS 5
+#define FILE_HASH_BITS 7
#define FILE_NRHASH (1<<FILE_HASH_BITS)
-static struct nlm_file * nlm_files[FILE_NRHASH];
+static struct hlist_head nlm_files[FILE_NRHASH];
static DEFINE_MUTEX(nlm_file_mutex);
#ifdef NFSD_DEBUG
@@ -82,6 +82,7 @@ u32
nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
struct nfs_fh *f)
{
+ struct hlist_node *pos;
struct nlm_file *file;
unsigned int hash;
u32 nfserr;
@@ -93,7 +94,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
/* Lock file table */
mutex_lock(&nlm_file_mutex);
- for (file = nlm_files[hash]; file; file = file->f_next)
+ hlist_for_each_entry(file, pos, &nlm_files[hash], f_list)
if (!nfs_compare_fh(&file->f_handle, f))
goto found;
@@ -105,8 +106,9 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
goto out_unlock;
memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
- file->f_hash = hash;
- init_MUTEX(&file->f_sema);
+ mutex_init(&file->f_mutex);
+ INIT_HLIST_NODE(&file->f_list);
+ INIT_LIST_HEAD(&file->f_blocks);
/* Open the file. Note that this must not sleep for too long, else
* we would lock up lockd:-) So no NFS re-exports, folks.
@@ -115,12 +117,11 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
* the file.
*/
if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
- dprintk("lockd: open failed (nfserr %d)\n", ntohl(nfserr));
+ dprintk("lockd: open failed (error %d)\n", nfserr);
goto out_free;
}
- file->f_next = nlm_files[hash];
- nlm_files[hash] = file;
+ hlist_add_head(&file->f_list, &nlm_files[hash]);
found:
dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
@@ -149,22 +150,14 @@ out_free:
static inline void
nlm_delete_file(struct nlm_file *file)
{
- struct nlm_file **fp, *f;
-
nlm_debug_print_file("closing file", file);
-
- fp = nlm_files + file->f_hash;
- while ((f = *fp) != NULL) {
- if (f == file) {
- *fp = file->f_next;
- nlmsvc_ops->fclose(file->f_file);
- kfree(file);
- return;
- }
- fp = &f->f_next;
+ if (!hlist_unhashed(&file->f_list)) {
+ hlist_del(&file->f_list);
+ nlmsvc_ops->fclose(file->f_file);
+ kfree(file);
+ } else {
+ printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
}
-
- printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
}
/*
@@ -172,7 +165,8 @@ nlm_delete_file(struct nlm_file *file)
* action.
*/
static int
-nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
+nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
+ nlm_host_match_fn_t match)
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
@@ -186,17 +180,11 @@ again:
/* update current lock count */
file->f_locks++;
+
lockhost = (struct nlm_host *) fl->fl_owner;
- if (action == NLM_ACT_MARK)
- lockhost->h_inuse = 1;
- else if (action == NLM_ACT_CHECK)
- return 1;
- else if (action == NLM_ACT_UNLOCK) {
+ if (match(lockhost, host)) {
struct file_lock lock = *fl;
- if (host && lockhost != host)
- continue;
-
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
@@ -213,53 +201,66 @@ again:
}
/*
- * Operate on a single file
+ * Inspect a single file
*/
static inline int
-nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
+nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, nlm_host_match_fn_t match)
{
- if (action == NLM_ACT_CHECK) {
- /* Fast path for mark and sweep garbage collection */
- if (file->f_count || file->f_blocks || file->f_shares)
+ nlmsvc_traverse_blocks(host, file, match);
+ nlmsvc_traverse_shares(host, file, match);
+ return nlm_traverse_locks(host, file, match);
+}
+
+/*
+ * Quick check whether there are still any locks, blocks or
+ * shares on a given file.
+ */
+static inline int
+nlm_file_inuse(struct nlm_file *file)
+{
+ struct inode *inode = nlmsvc_file_inode(file);
+ struct file_lock *fl;
+
+ if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
+ return 1;
+
+ for (fl = inode->i_flock; fl; fl = fl->fl_next) {
+ if (fl->fl_lmops == &nlmsvc_lock_operations)
return 1;
- } else {
- nlmsvc_traverse_blocks(host, file, action);
- nlmsvc_traverse_shares(host, file, action);
}
- return nlm_traverse_locks(host, file, action);
+ file->f_locks = 0;
+ return 0;
}
/*
* Loop over all files in the file table.
*/
static int
-nlm_traverse_files(struct nlm_host *host, int action)
+nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match)
{
- struct nlm_file *file, **fp;
+ struct hlist_node *pos, *next;
+ struct nlm_file *file;
int i, ret = 0;
mutex_lock(&nlm_file_mutex);
for (i = 0; i < FILE_NRHASH; i++) {
- fp = nlm_files + i;
- while ((file = *fp) != NULL) {
+ hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
file->f_count++;
mutex_unlock(&nlm_file_mutex);
/* Traverse locks, blocks and shares of this file
* and update file->f_locks count */
- if (nlm_inspect_file(host, file, action))
+ if (nlm_inspect_file(host, file, match))
ret = 1;
mutex_lock(&nlm_file_mutex);
file->f_count--;
/* No more references to this file. Let go of it. */
- if (!file->f_blocks && !file->f_locks
+ if (list_empty(&file->f_blocks) && !file->f_locks
&& !file->f_shares && !file->f_count) {
- *fp = file->f_next;
+ hlist_del(&file->f_list);
nlmsvc_ops->fclose(file->f_file);
kfree(file);
- } else {
- fp = &file->f_next;
}
}
}
@@ -286,23 +287,54 @@ nlm_release_file(struct nlm_file *file)
mutex_lock(&nlm_file_mutex);
/* If there are no more locks etc, delete the file */
- if(--file->f_count == 0) {
- if(!nlm_inspect_file(NULL, file, NLM_ACT_CHECK))
- nlm_delete_file(file);
- }
+ if (--file->f_count == 0 && !nlm_file_inuse(file))
+ nlm_delete_file(file);
mutex_unlock(&nlm_file_mutex);
}
/*
+ * Helpers function for resource traversal
+ *
+ * nlmsvc_mark_host:
+ * used by the garbage collector; simply sets h_inuse.
+ * Always returns 0.
+ *
+ * nlmsvc_same_host:
+ * returns 1 iff the two hosts match. Used to release
+ * all resources bound to a specific host.
+ *
+ * nlmsvc_is_client:
+ * returns 1 iff the host is a client.
+ * Used by nlmsvc_invalidate_all
+ */
+static int
+nlmsvc_mark_host(struct nlm_host *host, struct nlm_host *dummy)
+{
+ host->h_inuse = 1;
+ return 0;
+}
+
+static int
+nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other)
+{
+ return host == other;
+}
+
+static int
+nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy)
+{
+ return host->h_server;
+}
+
+/*
* Mark all hosts that still hold resources
*/
void
nlmsvc_mark_resources(void)
{
dprintk("lockd: nlmsvc_mark_resources\n");
-
- nlm_traverse_files(NULL, NLM_ACT_MARK);
+ nlm_traverse_files(NULL, nlmsvc_mark_host);
}
/*
@@ -313,23 +345,25 @@ nlmsvc_free_host_resources(struct nlm_host *host)
{
dprintk("lockd: nlmsvc_free_host_resources\n");
- if (nlm_traverse_files(host, NLM_ACT_UNLOCK))
+ if (nlm_traverse_files(host, nlmsvc_same_host)) {
printk(KERN_WARNING
- "lockd: couldn't remove all locks held by %s",
+ "lockd: couldn't remove all locks held by %s\n",
host->h_name);
+ BUG();
+ }
}
/*
- * delete all hosts structs for clients
+ * Remove all locks held for clients
*/
void
nlmsvc_invalidate_all(void)
{
- struct nlm_host *host;
- while ((host = nlm_find_client()) != NULL) {
- nlmsvc_free_host_resources(host);
- host->h_expires = 0;
- host->h_killed = 1;
- nlm_release_host(host);
- }
+ /* Release all locks held by NFS clients.
+ * Previously, the code would call
+ * nlmsvc_free_host_resources for each client in
+ * turn, which is about as inefficient as it gets.
+ * Now we just do it once in nlm_traverse_files.
+ */
+ nlm_traverse_files(NULL, nlmsvc_is_client);
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cfe141e5d759..e13fa23bd108 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -319,12 +319,25 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
static struct cache_head *export_table[EXPORT_HASHMAX];
+static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
+{
+ int i;
+
+ for (i = 0; i < fsloc->locations_count; i++) {
+ kfree(fsloc->locations[i].path);
+ kfree(fsloc->locations[i].hosts);
+ }
+ kfree(fsloc->locations);
+}
+
static void svc_export_put(struct kref *ref)
{
struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
dput(exp->ex_dentry);
mntput(exp->ex_mnt);
auth_domain_put(exp->ex_client);
+ kfree(exp->ex_path);
+ nfsd4_fslocs_free(&exp->ex_fslocs);
kfree(exp);
}
@@ -386,6 +399,69 @@ static int check_export(struct inode *inode, int flags)
}
+#ifdef CONFIG_NFSD_V4
+
+static int
+fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
+{
+ int len;
+ int migrated, i, err;
+
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len != 5 || memcmp(buf, "fsloc", 5))
+ return 0;
+
+ /* listsize */
+ err = get_int(mesg, &fsloc->locations_count);
+ if (err)
+ return err;
+ if (fsloc->locations_count > MAX_FS_LOCATIONS)
+ return -EINVAL;
+ if (fsloc->locations_count == 0)
+ return 0;
+
+ fsloc->locations = kzalloc(fsloc->locations_count
+ * sizeof(struct nfsd4_fs_location), GFP_KERNEL);
+ if (!fsloc->locations)
+ return -ENOMEM;
+ for (i=0; i < fsloc->locations_count; i++) {
+ /* colon separated host list */
+ err = -EINVAL;
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].hosts)
+ goto out_free_all;
+ err = -EINVAL;
+ /* slash separated path component list */
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len <= 0)
+ goto out_free_all;
+ err = -ENOMEM;
+ fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL);
+ if (!fsloc->locations[i].path)
+ goto out_free_all;
+ }
+ /* migrated */
+ err = get_int(mesg, &migrated);
+ if (err)
+ goto out_free_all;
+ err = -EINVAL;
+ if (migrated < 0 || migrated > 1)
+ goto out_free_all;
+ fsloc->migrated = migrated;
+ return 0;
+out_free_all:
+ nfsd4_fslocs_free(fsloc);
+ return err;
+}
+
+#else /* CONFIG_NFSD_V4 */
+static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; }
+#endif
+
static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
@@ -398,6 +474,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
int an_int;
nd.dentry = NULL;
+ exp.ex_path = NULL;
if (mesg[mlen-1] != '\n')
return -EINVAL;
@@ -428,6 +505,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_client = dom;
exp.ex_mnt = nd.mnt;
exp.ex_dentry = nd.dentry;
+ exp.ex_path = kstrdup(buf, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!exp.ex_path)
+ goto out;
/* expiry */
err = -EINVAL;
@@ -435,6 +516,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (exp.h.expiry_time == 0)
goto out;
+ /* fs locations */
+ exp.ex_fslocs.locations = NULL;
+ exp.ex_fslocs.locations_count = 0;
+ exp.ex_fslocs.migrated = 0;
+
/* flags */
err = get_int(&mesg, &an_int);
if (err == -ENOENT)
@@ -460,6 +546,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = check_export(nd.dentry->d_inode, exp.ex_flags);
if (err) goto out;
+
+ err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
+ if (err)
+ goto out;
}
expp = svc_export_lookup(&exp);
@@ -473,6 +563,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
else
exp_put(expp);
out:
+ kfree(exp.ex_path);
if (nd.dentry)
path_release(&nd);
out_no_path:
@@ -482,7 +573,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
return err;
}
-static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong);
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
static int svc_export_show(struct seq_file *m,
struct cache_detail *cd,
@@ -501,8 +593,8 @@ static int svc_export_show(struct seq_file *m,
seq_putc(m, '(');
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
- exp_flags(m, exp->ex_flags, exp->ex_fsid,
- exp->ex_anon_uid, exp->ex_anon_gid);
+ exp_flags(m, exp->ex_flags, exp->ex_fsid,
+ exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
seq_puts(m, ")\n");
return 0;
}
@@ -524,6 +616,10 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_client = item->ex_client;
new->ex_dentry = dget(item->ex_dentry);
new->ex_mnt = mntget(item->ex_mnt);
+ new->ex_path = NULL;
+ new->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = 0;
}
static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -535,6 +631,14 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
new->ex_anon_uid = item->ex_anon_uid;
new->ex_anon_gid = item->ex_anon_gid;
new->ex_fsid = item->ex_fsid;
+ new->ex_path = item->ex_path;
+ item->ex_path = NULL;
+ new->ex_fslocs.locations = item->ex_fslocs.locations;
+ item->ex_fslocs.locations = NULL;
+ new->ex_fslocs.locations_count = item->ex_fslocs.locations_count;
+ item->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = item->ex_fslocs.migrated;
+ item->ex_fslocs.migrated = 0;
}
static struct cache_head *svc_export_alloc(void)
@@ -1048,30 +1152,21 @@ int
exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
struct cache_req *creq)
{
- struct svc_expkey *fsid_key;
struct svc_export *exp;
int rv;
u32 fsidv[2];
mk_fsid_v1(fsidv, 0);
- fsid_key = exp_find_key(clp, 1, fsidv, creq);
- if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN)
+ exp = exp_find(clp, 1, fsidv, creq);
+ if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
return nfserr_dropit;
- if (!fsid_key || IS_ERR(fsid_key))
- return nfserr_perm;
-
- exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq);
if (exp == NULL)
- rv = nfserr_perm;
+ return nfserr_perm;
else if (IS_ERR(exp))
- rv = nfserrno(PTR_ERR(exp));
- else {
- rv = fh_compose(fhp, exp,
- fsid_key->ek_dentry, NULL);
- exp_put(exp);
- }
- cache_put(&fsid_key->h, &svc_expkey_cache);
+ return nfserrno(PTR_ERR(exp));
+ rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
+ exp_put(exp);
return rv;
}
@@ -1158,7 +1253,8 @@ static struct flags {
{ 0, {"", ""}}
};
-static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong)
+static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
{
int first = 0;
struct flags *flg;
@@ -1174,6 +1270,21 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t
seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
if (anong != (gid_t)-2 && anong != (0x10000-2))
seq_printf(m, "%sanongid=%d", first++?",":"", anong);
+ if (fsloc && fsloc->locations_count > 0) {
+ char *loctype = (fsloc->migrated) ? "refer" : "replicas";
+ int i;
+
+ seq_printf(m, "%s%s=", first++?",":"", loctype);
+ seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\");
+ for (i = 1; i < fsloc->locations_count; i++) {
+ seq_putc(m, ';');
+ seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\");
+ }
+ }
}
static int e_show(struct seq_file *m, void *p)
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index fe56b38364cc..9187755661df 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -241,7 +241,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = w;
while (w > 0) {
- if (!svc_take_res_page(rqstp))
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
return 0;
w -= PAGE_SIZE;
}
@@ -333,4 +333,5 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 1,
};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 16e10c170aed..d4bdc00c1169 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -185,7 +185,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = w;
while (w > 0) {
- if (!svc_take_res_page(rqstp))
+ if (!rqstp->rq_respages[rqstp->rq_resused++])
return 0;
w -= PAGE_SIZE;
}
@@ -263,5 +263,6 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
+ .vs_hidden = 1,
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index f61142afea44..a5ebc7dbb384 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
struct nfsd3_readres *resp)
{
int nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
SVCFH_fmt(&argp->fh),
@@ -172,15 +173,15 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
*/
resp->count = argp->count;
- if (NFSSVC_MAXBLKSIZE < resp->count)
- resp->count = NFSSVC_MAXBLKSIZE;
+ if (max_blocksize < resp->count)
+ resp->count = max_blocksize;
svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_read(rqstp, &resp->fh, NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
&resp->count);
if (nfserr == 0) {
struct inode *inode = resp->fh.fh_dentry->d_inode;
@@ -210,7 +211,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh, NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
argp->len,
&resp->committed);
resp->count = argp->count;
@@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
struct nfsd3_fsinfores *resp)
{
int nfserr;
+ u32 max_blocksize = svc_max_payload(rqstp);
dprintk("nfsd: FSINFO(3) %s\n",
SVCFH_fmt(&argp->fh));
- resp->f_rtmax = NFSSVC_MAXBLKSIZE;
- resp->f_rtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_rtmax = max_blocksize;
+ resp->f_rtpref = max_blocksize;
resp->f_rtmult = PAGE_SIZE;
- resp->f_wtmax = NFSSVC_MAXBLKSIZE;
- resp->f_wtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_wtmax = max_blocksize;
+ resp->f_wtpref = max_blocksize;
resp->f_wtmult = PAGE_SIZE;
resp->f_dtpref = PAGE_SIZE;
resp->f_maxfilesize = ~(u32) 0;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 243d94b9653a..247d518248bf 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
{
unsigned int len;
int v,pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
@@ -337,17 +338,16 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
len = args->count = ntohl(*p++);
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > max_blocksize)
+ len = max_blocksize;
/* set up the kvec */
v=0;
while (len > 0) {
- pn = rqstp->rq_resused;
- svc_take_page(rqstp);
- args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
- args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
- len -= args->vec[v].iov_len;
+ pn = rqstp->rq_resused++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+ len -= rqstp->rq_vec[v].iov_len;
v++;
}
args->vlen = v;
@@ -359,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_writeargs *args)
{
unsigned int len, v, hdr;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
@@ -373,22 +374,22 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_arg.len - hdr < len)
return 0;
- args->vec[0].iov_base = (void*)p;
- args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > max_blocksize)
+ len = max_blocksize;
v= 0;
- while (len > args->vec[v].iov_len) {
- len -= args->vec[v].iov_len;
+ while (len > rqstp->rq_vec[v].iov_len) {
+ len -= rqstp->rq_vec[v].iov_len;
v++;
- args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
- args->vec[v].iov_len = PAGE_SIZE;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
+ rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
- args->vec[v].iov_len = len;
+ rqstp->rq_vec[v].iov_len = len;
args->vlen = v+1;
- return args->count == args->len && args->vec[0].iov_len > 0;
+ return args->count == args->len && rqstp->rq_vec[0].iov_len > 0;
}
int
@@ -446,11 +447,11 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
* This page appears in the rq_res.pages list, but as pages_len is always
* 0, it won't get in the way
*/
- svc_take_page(rqstp);
len = ntohl(*p++);
if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
return 0;
- args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->tname = new =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
args->tlen = len;
/* first copy and check from the first page */
old = (char*)p;
@@ -522,8 +523,8 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
{
if (!(p = decode_fh(p, &args->fh)))
return 0;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -554,8 +555,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer =
+ page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -565,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_readdirargs *args)
{
int len, pn;
+ u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh)))
return 0;
@@ -573,13 +575,12 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
- len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE :
+ len = (args->count > max_blocksize) ? max_blocksize :
args->count;
args->count = len;
while (len > 0) {
- pn = rqstp->rq_resused;
- svc_take_page(rqstp);
+ pn = rqstp->rq_resused++;
if (!args->buffer)
args->buffer = page_address(rqstp->rq_respages[pn]);
len -= PAGE_SIZE;
@@ -668,7 +669,6 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->len;
if (resp->len & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -693,7 +693,6 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
@@ -768,7 +767,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = (resp->count) << 2;
/* add the 'tail' to the end of the 'head' page - page 0. */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p++ = 0; /* no more entries */
*p++ = htonl(resp->common.err == nfserr_eof);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index edb107e61b91..5d94555cdc83 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -63,6 +63,8 @@
#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
| NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
+#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP)
+
#define MASK_EQUAL(mask1, mask2) \
( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
@@ -96,24 +98,26 @@ deny_mask(u32 allow_mask, unsigned int flags)
/* XXX: modify functions to return NFS errors; they're only ever
* used by nfs code, after all.... */
-static int
-mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
+/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
+ * side of being more restrictive, so the mode bit mapping below is
+ * pessimistic. An optimistic version would be needed to handle DENY's,
+ * but we espect to coalesce all ALLOWs and DENYs before mapping to mode
+ * bits. */
+
+static void
+low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
{
- u32 ignore = 0;
+ u32 write_mode = NFS4_WRITE_MODE;
- if (!(flags & NFS4_ACL_DIR))
- ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */
- perm |= ignore;
+ if (flags & NFS4_ACL_DIR)
+ write_mode |= NFS4_ACE_DELETE_CHILD;
*mode = 0;
if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
*mode |= ACL_READ;
- if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE)
+ if ((perm & write_mode) == write_mode)
*mode |= ACL_WRITE;
if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
*mode |= ACL_EXECUTE;
- if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags)))
- return -EINVAL;
- return 0;
}
struct ace_container {
@@ -338,38 +342,6 @@ sort_pacl(struct posix_acl *pacl)
return;
}
-static int
-write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
- struct posix_acl_entry **pace, short tag, unsigned int flags)
-{
- struct posix_acl_entry *this = *pace;
-
- if (*pace == pacl->a_entries + pacl->a_count)
- return -EINVAL; /* fell off the end */
- (*pace)++;
- this->e_tag = tag;
- if (tag == ACL_USER_OBJ)
- flags |= NFS4_ACL_OWNER;
- if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags))
- return -EINVAL;
- this->e_id = (tag == ACL_USER || tag == ACL_GROUP ?
- ace->who : ACL_UNDEFINED_ID);
- return 0;
-}
-
-static struct nfs4_ace *
-get_next_v4_ace(struct list_head **p, struct list_head *head)
-{
- struct nfs4_ace *ace;
-
- *p = (*p)->next;
- if (*p == head)
- return NULL;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
-
- return ace;
-}
-
int
nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
struct posix_acl **dpacl, unsigned int flags)
@@ -385,42 +357,23 @@ nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
goto out;
error = nfs4_acl_split(acl, dacl);
- if (error < 0)
+ if (error)
goto out_acl;
- if (pacl != NULL) {
- if (acl->naces == 0) {
- error = -ENODATA;
- goto try_dpacl;
- }
-
- *pacl = _nfsv4_to_posix_one(acl, flags);
- if (IS_ERR(*pacl)) {
- error = PTR_ERR(*pacl);
- *pacl = NULL;
- goto out_acl;
- }
+ *pacl = _nfsv4_to_posix_one(acl, flags);
+ if (IS_ERR(*pacl)) {
+ error = PTR_ERR(*pacl);
+ *pacl = NULL;
+ goto out_acl;
}
-try_dpacl:
- if (dpacl != NULL) {
- if (dacl->naces == 0) {
- if (pacl == NULL || *pacl == NULL)
- error = -ENODATA;
- goto out_acl;
- }
-
- error = 0;
- *dpacl = _nfsv4_to_posix_one(dacl, flags);
- if (IS_ERR(*dpacl)) {
- error = PTR_ERR(*dpacl);
- *dpacl = NULL;
- goto out_acl;
- }
+ *dpacl = _nfsv4_to_posix_one(dacl, flags);
+ if (IS_ERR(*dpacl)) {
+ error = PTR_ERR(*dpacl);
+ *dpacl = NULL;
}
-
out_acl:
- if (error && pacl) {
+ if (error) {
posix_acl_release(*pacl);
*pacl = NULL;
}
@@ -429,349 +382,311 @@ out:
return error;
}
+/*
+ * While processing the NFSv4 ACE, this maintains bitmasks representing
+ * which permission bits have been allowed and which denied to a given
+ * entity: */
+struct posix_ace_state {
+ u32 allow;
+ u32 deny;
+};
+
+struct posix_user_ace_state {
+ uid_t uid;
+ struct posix_ace_state perms;
+};
+
+struct posix_ace_state_array {
+ int n;
+ struct posix_user_ace_state aces[];
+};
+
+/*
+ * While processing the NFSv4 ACE, this maintains the partial permissions
+ * calculated so far: */
+
+struct posix_acl_state {
+ struct posix_ace_state owner;
+ struct posix_ace_state group;
+ struct posix_ace_state other;
+ struct posix_ace_state everyone;
+ struct posix_ace_state mask; /* Deny unused in this case */
+ struct posix_ace_state_array *users;
+ struct posix_ace_state_array *groups;
+};
+
static int
-same_who(struct nfs4_ace *a, struct nfs4_ace *b)
+init_state(struct posix_acl_state *state, int cnt)
{
- return a->whotype == b->whotype &&
- (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who);
+ int alloc;
+
+ memset(state, 0, sizeof(struct posix_acl_state));
+ /*
+ * In the worst case, each individual acl could be for a distinct
+ * named user or group, but we don't no which, so we allocate
+ * enough space for either:
+ */
+ alloc = sizeof(struct posix_ace_state_array)
+ + cnt*sizeof(struct posix_ace_state);
+ state->users = kzalloc(alloc, GFP_KERNEL);
+ if (!state->users)
+ return -ENOMEM;
+ state->groups = kzalloc(alloc, GFP_KERNEL);
+ if (!state->groups) {
+ kfree(state->users);
+ return -ENOMEM;
+ }
+ return 0;
}
-static int
-complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny,
- unsigned int flags)
-{
- int ignore = 0;
- if (!(flags & NFS4_ACL_DIR))
- ignore |= NFS4_ACE_DELETE_CHILD;
- return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags),
- ignore|deny->access_mask) &&
- allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
- deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
- allow->flag == deny->flag &&
- same_who(allow, deny);
+static void
+free_state(struct posix_acl_state *state) {
+ kfree(state->users);
+ kfree(state->groups);
}
-static inline int
-user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
-
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_USER_OBJ)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- error = 0;
-out:
- return error;
+ state->mask.allow |= astate->allow;
}
-static inline int
-users_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
-{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
+/*
+ * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
+ * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
+ * to traditional read/write/execute permissions.
+ *
+ * It's problematic to reject acls that use certain mode bits, because it
+ * places the burden on users to learn the rules about which bits one
+ * particular server sets, without giving the user a lot of help--we return an
+ * error that could mean any number of different things. To make matters
+ * worse, the problematic bits might be introduced by some application that's
+ * automatically mapping from some other acl model.
+ *
+ * So wherever possible we accept anything, possibly erring on the side of
+ * denying more permissions than necessary.
+ *
+ * However we do reject *explicit* DENY's of a few bits representing
+ * permissions we could never deny:
+ */
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- while (ace2type(ace) == ACL_USER) {
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
- goto out;
- if (*mask_ace &&
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_USER, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- if ((*mask_ace)->flag != ace2->flag ||
- !same_who(*mask_ace, ace2))
- goto out;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- }
- error = 0;
-out:
- return error;
+static inline int check_deny(u32 mask, int isowner)
+{
+ if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
+ return -EINVAL;
+ if (!isowner)
+ return 0;
+ if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
+ return -EINVAL;
+ return 0;
}
-static inline int
-group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static struct posix_acl *
+posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
- struct ace_container *ac;
- struct list_head group_l;
-
- INIT_LIST_HEAD(&group_l);
- ace = list_entry(*p, struct nfs4_ace, l_ace);
-
- /* group owner (mask and allow aces) */
+ struct posix_acl_entry *pace;
+ struct posix_acl *pacl;
+ int nace;
+ int i, error = 0;
- if (pacl->a_count != 3) {
- /* then the group owner should be preceded by mask */
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
- goto out;
- if (*mask_ace &&
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
+ nace = 4 + state->users->n + state->groups->n;
+ pacl = posix_acl_alloc(nace, GFP_KERNEL);
+ if (!pacl)
+ return ERR_PTR(-ENOMEM);
- if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace))
- goto out;
+ pace = pacl->a_entries;
+ pace->e_tag = ACL_USER_OBJ;
+ error = check_deny(state->owner.deny, 1);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+
+ for (i=0; i < state->users->n; i++) {
+ pace++;
+ pace->e_tag = ACL_USER;
+ error = check_deny(state->users->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->users->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->users->aces[i].uid;
+ add_to_mask(state, &state->users->aces[i].perms);
}
- if (ace2type(ace) != ACL_GROUP_OBJ)
- goto out;
-
- ac = kmalloc(sizeof(*ac), GFP_KERNEL);
- error = -ENOMEM;
- if (ac == NULL)
- goto out;
- ac->ace = ace;
- list_add_tail(&ac->ace_l, &group_l);
-
- error = -EINVAL;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
-
- error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags);
- if (error < 0)
- goto out;
-
- error = -EINVAL;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
-
- /* groups (mask and allow aces) */
-
- while (ace2type(ace) == ACL_GROUP) {
- if (*mask_ace == NULL)
- goto out;
-
- if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
- !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
- goto out;
- *mask_ace = ace;
+ pace++;
+ pace->e_tag = ACL_GROUP_OBJ;
+ error = check_deny(state->group.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
+ add_to_mask(state, &state->group);
+
+ for (i=0; i < state->groups->n; i++) {
+ pace++;
+ pace->e_tag = ACL_GROUP;
+ error = check_deny(state->groups->aces[i].perms.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->groups->aces[i].perms.allow,
+ &pace->e_perm, flags);
+ pace->e_id = state->groups->aces[i].uid;
+ add_to_mask(state, &state->groups->aces[i].perms);
+ }
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- ac = kmalloc(sizeof(*ac), GFP_KERNEL);
- error = -ENOMEM;
- if (ac == NULL)
- goto out;
- error = -EINVAL;
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
- !same_who(ace, *mask_ace))
- goto out;
+ pace++;
+ pace->e_tag = ACL_MASK;
+ low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
- ac->ace = ace;
- list_add_tail(&ac->ace_l, &group_l);
+ pace++;
+ pace->e_tag = ACL_OTHER;
+ error = check_deny(state->other.deny, 0);
+ if (error)
+ goto out_err;
+ low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
+ pace->e_id = ACL_UNDEFINED_ID;
- error = write_pace(ace, pacl, pace, ACL_GROUP, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- }
+ return pacl;
+out_err:
+ posix_acl_release(pacl);
+ return ERR_PTR(error);
+}
- /* group owner (deny ace) */
+static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Allow all bits in the mask not already denied: */
+ astate->allow |= mask & ~astate->deny;
+}
- if (ace2type(ace) != ACL_GROUP_OBJ)
- goto out;
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- ace2 = ac->ace;
- if (!complementary_ace_pair(ace2, ace, flags))
- goto out;
- list_del(group_l.next);
- kfree(ac);
+static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
+{
+ /* Deny all bits in the mask not already allowed: */
+ astate->deny |= mask & ~astate->allow;
+}
- /* groups (deny aces) */
+static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
+{
+ int i;
- while (!list_empty(&group_l)) {
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_GROUP)
- goto out;
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- ace2 = ac->ace;
- if (!complementary_ace_pair(ace2, ace, flags))
- goto out;
- list_del(group_l.next);
- kfree(ac);
- }
+ for (i = 0; i < a->n; i++)
+ if (a->aces[i].uid == uid)
+ return i;
+ /* Not found: */
+ a->n++;
+ a->aces[i].uid = uid;
+ a->aces[i].perms.allow = state->everyone.allow;
+ a->aces[i].perms.deny = state->everyone.deny;
- ace = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace == NULL)
- goto out;
- if (ace2type(ace) != ACL_OTHER)
- goto out;
- error = 0;
-out:
- while (!list_empty(&group_l)) {
- ac = list_entry(group_l.next, struct ace_container, ace_l);
- list_del(group_l.next);
- kfree(ac);
- }
- return error;
+ return i;
}
-static inline int
-mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct nfs4_ace **mask_ace,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
{
- int error = -EINVAL;
- struct nfs4_ace *ace;
+ int i;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
- if (pacl->a_count != 3) {
- if (*mask_ace == NULL)
- goto out;
- (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags);
- write_pace(*mask_ace, pacl, pace, ACL_MASK, flags);
- }
- error = 0;
-out:
- return error;
+ for (i=0; i < a->n; i++)
+ deny_bits(&a->aces[i].perms, mask);
}
-static inline int
-other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
- struct posix_acl *pacl, struct posix_acl_entry **pace,
- unsigned int flags)
+static void allow_bits_array(struct posix_ace_state_array *a, u32 mask)
{
- int error = -EINVAL;
- struct nfs4_ace *ace, *ace2;
+ int i;
- ace = list_entry(*p, struct nfs4_ace, l_ace);
- if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
- goto out;
- error = write_pace(ace, pacl, pace, ACL_OTHER, flags);
- if (error < 0)
- goto out;
- error = -EINVAL;
- ace2 = get_next_v4_ace(p, &n4acl->ace_head);
- if (ace2 == NULL)
- goto out;
- if (!complementary_ace_pair(ace, ace2, flags))
- goto out;
- error = 0;
-out:
- return error;
+ for (i=0; i < a->n; i++)
+ allow_bits(&a->aces[i].perms, mask);
}
-static int
-calculate_posix_ace_count(struct nfs4_acl *n4acl)
+static void process_one_v4_ace(struct posix_acl_state *state,
+ struct nfs4_ace *ace)
{
- if (n4acl->naces == 6) /* owner, owner group, and other only */
- return 3;
- else { /* Otherwise there must be a mask entry. */
- /* Also, the remaining entries are for named users and
- * groups, and come in threes (mask, allow, deny): */
- if (n4acl->naces < 7)
- return -EINVAL;
- if ((n4acl->naces - 7) % 3)
- return -EINVAL;
- return 4 + (n4acl->naces - 7)/3;
+ u32 mask = ace->access_mask;
+ int i;
+
+ switch (ace2type(ace)) {
+ case ACL_USER_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_USER:
+ i = find_uid(state, state->users, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->users->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->users->aces[i].perms, mask);
+ mask = state->users->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ }
+ break;
+ case ACL_GROUP_OBJ:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->group, mask);
+ } else {
+ deny_bits(&state->group, mask);
+ mask = state->group.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_GROUP:
+ i = find_uid(state, state->groups, ace->who);
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->groups->aces[i].perms, mask);
+ } else {
+ deny_bits(&state->groups->aces[i].perms, mask);
+ mask = state->groups->aces[i].perms.deny;
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
+ break;
+ case ACL_OTHER:
+ if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
+ allow_bits(&state->owner, mask);
+ allow_bits(&state->group, mask);
+ allow_bits(&state->other, mask);
+ allow_bits(&state->everyone, mask);
+ allow_bits_array(state->users, mask);
+ allow_bits_array(state->groups, mask);
+ } else {
+ deny_bits(&state->owner, mask);
+ deny_bits(&state->group, mask);
+ deny_bits(&state->other, mask);
+ deny_bits(&state->everyone, mask);
+ deny_bits_array(state->users, mask);
+ deny_bits_array(state->groups, mask);
+ }
}
}
-
static struct posix_acl *
_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
{
+ struct posix_acl_state state;
struct posix_acl *pacl;
- int error = -EINVAL, nace = 0;
- struct list_head *p;
- struct nfs4_ace *mask_ace = NULL;
- struct posix_acl_entry *pace;
-
- nace = calculate_posix_ace_count(n4acl);
- if (nace < 0)
- goto out_err;
-
- pacl = posix_acl_alloc(nace, GFP_KERNEL);
- error = -ENOMEM;
- if (pacl == NULL)
- goto out_err;
-
- pace = &pacl->a_entries[0];
- p = &n4acl->ace_head;
-
- error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags);
- if (error)
- goto out_acl;
-
- error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
- if (error)
- goto out_acl;
+ struct nfs4_ace *ace;
+ int ret;
- error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace,
- flags);
- if (error)
- goto out_acl;
+ ret = init_state(&state, n4acl->naces);
+ if (ret)
+ return ERR_PTR(ret);
- error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
- if (error)
- goto out_acl;
- error = other_from_v4(n4acl, &p, pacl, &pace, flags);
- if (error)
- goto out_acl;
+ list_for_each_entry(ace, &n4acl->ace_head, l_ace)
+ process_one_v4_ace(&state, ace);
- error = -EINVAL;
- if (p->next != &n4acl->ace_head)
- goto out_acl;
- if (pace != pacl->a_entries + pacl->a_count)
- goto out_acl;
+ pacl = posix_state_to_acl(&state, flags);
- sort_pacl(pacl);
+ free_state(&state);
- return pacl;
-out_acl:
- posix_acl_release(pacl);
-out_err:
- pacl = ERR_PTR(error);
+ if (!IS_ERR(pacl))
+ sort_pacl(pacl);
return pacl;
}
@@ -785,22 +700,41 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
list_for_each_safe(h, n, &acl->ace_head) {
ace = list_entry(h, struct nfs4_ace, l_ace);
- if ((ace->flag & NFS4_INHERITANCE_FLAGS)
- != NFS4_INHERITANCE_FLAGS)
- continue;
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
+ ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+ return -EINVAL;
- error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
- ace->access_mask, ace->whotype, ace->who);
- if (error < 0)
- goto out;
+ if (ace->flag & ~NFS4_SUPPORTED_FLAGS)
+ return -EINVAL;
- list_del(h);
- kfree(ace);
- acl->naces--;
+ switch (ace->flag & NFS4_INHERITANCE_FLAGS) {
+ case 0:
+ /* Leave this ace in the effective acl: */
+ continue;
+ case NFS4_INHERITANCE_FLAGS:
+ /* Add this ace to the default acl and remove it
+ * from the effective acl: */
+ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+ ace->access_mask, ace->whotype, ace->who);
+ if (error)
+ return error;
+ list_del(h);
+ kfree(ace);
+ acl->naces--;
+ break;
+ case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE:
+ /* Add this ace to the default, but leave it in
+ * the effective acl as well: */
+ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+ ace->access_mask, ace->whotype, ace->who);
+ if (error)
+ return error;
+ break;
+ default:
+ return -EINVAL;
+ }
}
-
-out:
- return error;
+ return 0;
}
static short
@@ -930,23 +864,6 @@ nfs4_acl_write_who(int who, char *p)
return -1;
}
-static inline int
-match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who)
-{
- switch (ace->whotype) {
- case NFS4_ACL_WHO_NAMED:
- return who == ace->who;
- case NFS4_ACL_WHO_OWNER:
- return who == owner;
- case NFS4_ACL_WHO_GROUP:
- return who == group;
- case NFS4_ACL_WHO_EVERYONE:
- return 1;
- default:
- return 0;
- }
-}
-
EXPORT_SYMBOL(nfs4_acl_new);
EXPORT_SYMBOL(nfs4_acl_free);
EXPORT_SYMBOL(nfs4_acl_add_ace);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 15ded7a30a72..8333db12caca 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -646,7 +646,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
*p++ = nfssvc_boot.tv_usec;
status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
- write->wr_vec, write->wr_vlen, write->wr_buflen,
+ rqstp->rq_vec, write->wr_vlen, write->wr_buflen,
&write->wr_how_written);
if (filp)
fput(filp);
@@ -802,13 +802,29 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
* SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
* require a valid current filehandle
*/
- if ((!current_fh->fh_dentry) &&
- !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
- (op->opnum == OP_SETCLIENTID) ||
- (op->opnum == OP_SETCLIENTID_CONFIRM) ||
- (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
- (op->opnum == OP_RELEASE_LOCKOWNER))) {
- op->status = nfserr_nofilehandle;
+ if (!current_fh->fh_dentry) {
+ if (!((op->opnum == OP_PUTFH) ||
+ (op->opnum == OP_PUTROOTFH) ||
+ (op->opnum == OP_SETCLIENTID) ||
+ (op->opnum == OP_SETCLIENTID_CONFIRM) ||
+ (op->opnum == OP_RENEW) ||
+ (op->opnum == OP_RESTOREFH) ||
+ (op->opnum == OP_RELEASE_LOCKOWNER))) {
+ op->status = nfserr_nofilehandle;
+ goto encode_op;
+ }
+ }
+ /* Check must be done at start of each operation, except
+ * for GETATTR and ops not listed as returning NFS4ERR_MOVED
+ */
+ else if (current_fh->fh_export->ex_fslocs.migrated &&
+ !((op->opnum == OP_GETATTR) ||
+ (op->opnum == OP_PUTROOTFH) ||
+ (op->opnum == OP_PUTPUBFH) ||
+ (op->opnum == OP_RENEW) ||
+ (op->opnum == OP_SETCLIENTID) ||
+ (op->opnum == OP_RELEASE_LOCKOWNER))) {
+ op->status = nfserr_moved;
goto encode_op;
}
switch (op->opnum) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5be00436b5b8..41fc241b729a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -60,6 +60,14 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
+/*
+ * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing
+ * directory in order to indicate to the client that a filesystem boundary is present
+ * We use a fixed fsid for a referral
+ */
+#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL
+#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL
+
static int
check_filename(char *str, int len, int err)
{
@@ -926,26 +934,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
goto xdr_error;
}
- write->wr_vec[0].iov_base = p;
- write->wr_vec[0].iov_len = avail;
+ argp->rqstp->rq_vec[0].iov_base = p;
+ argp->rqstp->rq_vec[0].iov_len = avail;
v = 0;
len = write->wr_buflen;
- while (len > write->wr_vec[v].iov_len) {
- len -= write->wr_vec[v].iov_len;
+ while (len > argp->rqstp->rq_vec[v].iov_len) {
+ len -= argp->rqstp->rq_vec[v].iov_len;
v++;
- write->wr_vec[v].iov_base = page_address(argp->pagelist[0]);
+ argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen >= PAGE_SIZE) {
- write->wr_vec[v].iov_len = PAGE_SIZE;
+ argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE;
argp->pagelen -= PAGE_SIZE;
} else {
- write->wr_vec[v].iov_len = argp->pagelen;
+ argp->rqstp->rq_vec[v].iov_len = argp->pagelen;
argp->pagelen -= len;
}
}
- argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len);
- argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
- write->wr_vec[v].iov_len = len;
+ argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
+ argp->p = (u32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+ argp->rqstp->rq_vec[v].iov_len = len;
write->wr_vlen = v+1;
DECODE_TAIL;
@@ -1223,6 +1231,119 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
stateowner->so_replay.rp_buflen); \
} } while (0);
+/* Encode as an array of strings the string given with components
+ * seperated @sep.
+ */
+static int nfsd4_encode_components(char sep, char *components,
+ u32 **pp, int *buflen)
+{
+ u32 *p = *pp;
+ u32 *countp = p;
+ int strlen, count=0;
+ char *str, *end;
+
+ dprintk("nfsd4_encode_components(%s)\n", components);
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(0); /* We will fill this in with @count later */
+ end = str = components;
+ while (*end) {
+ for (; *end && (*end != sep); end++)
+ ; /* Point to end of component */
+ strlen = end - str;
+ if (strlen) {
+ if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
+ return nfserr_resource;
+ WRITE32(strlen);
+ WRITEMEM(str, strlen);
+ count++;
+ }
+ else
+ end++;
+ str = end;
+ }
+ *pp = p;
+ p = countp;
+ WRITE32(count);
+ return 0;
+}
+
+/*
+ * encode a location element of a fs_locations structure
+ */
+static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
+ u32 **pp, int *buflen)
+{
+ int status;
+ u32 *p = *pp;
+
+ status = nfsd4_encode_components(':', location->hosts, &p, buflen);
+ if (status)
+ return status;
+ status = nfsd4_encode_components('/', location->path, &p, buflen);
+ if (status)
+ return status;
+ *pp = p;
+ return 0;
+}
+
+/*
+ * Return the path to an export point in the pseudo filesystem namespace
+ * Returned string is safe to use as long as the caller holds a reference
+ * to @exp.
+ */
+static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp)
+{
+ struct svc_fh tmp_fh;
+ char *path, *rootpath;
+ int stat;
+
+ fh_init(&tmp_fh, NFS4_FHSIZE);
+ stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
+ if (stat)
+ return ERR_PTR(stat);
+ rootpath = tmp_fh.fh_export->ex_path;
+
+ path = exp->ex_path;
+
+ if (strncmp(path, rootpath, strlen(rootpath))) {
+ printk("nfsd: fs_locations failed;"
+ "%s is not contained in %s\n", path, rootpath);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ return path + strlen(rootpath);
+}
+
+/*
+ * encode a fs_locations structure
+ */
+static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
+ struct svc_export *exp,
+ u32 **pp, int *buflen)
+{
+ int status, i;
+ u32 *p = *pp;
+ struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
+ char *root = nfsd4_path(rqstp, exp);
+
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ status = nfsd4_encode_components('/', root, &p, buflen);
+ if (status)
+ return status;
+ if ((*buflen -= 4) < 0)
+ return nfserr_resource;
+ WRITE32(fslocs->locations_count);
+ for (i=0; i<fslocs->locations_count; i++) {
+ status = nfsd4_encode_fs_location4(&fslocs->locations[i],
+ &p, buflen);
+ if (status)
+ return status;
+ }
+ *pp = p;
+ return 0;
+}
static u32 nfs4_ftypes[16] = {
NF4BAD, NF4FIFO, NF4CHR, NF4BAD,
@@ -1272,6 +1393,25 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
}
+#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
+ FATTR4_WORD0_RDATTR_ERROR)
+#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
+
+static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
+{
+ /* As per referral draft: */
+ if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
+ *bmval1 & ~WORD1_ABSENT_FS_ATTRS) {
+ if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR ||
+ *bmval0 & FATTR4_WORD0_FS_LOCATIONS)
+ *rdattr_err = NFSERR_MOVED;
+ else
+ return nfserr_moved;
+ }
+ *bmval0 &= WORD0_ABSENT_FS_ATTRS;
+ *bmval1 &= WORD1_ABSENT_FS_ATTRS;
+ return 0;
+}
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
@@ -1294,6 +1434,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
u32 *attrlenp;
u32 dummy;
u64 dummy64;
+ u32 rdattr_err = 0;
u32 *p = buffer;
int status;
int aclsupport = 0;
@@ -1303,6 +1444,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
+ if (exp->ex_fslocs.migrated) {
+ status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
+ if (status)
+ goto out;
+ }
+
status = vfs_getattr(exp->ex_mnt, dentry, &stat);
if (status)
goto out_nfserr;
@@ -1334,6 +1481,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out_nfserr;
}
}
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
+ if (exp->ex_fslocs.locations == NULL) {
+ bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
+ }
+ }
if ((buflen -= 16) < 0)
goto out_resource;
@@ -1343,12 +1495,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
attrlenp = p++; /* to be backfilled later */
if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0;
if ((buflen -= 12) < 0)
goto out_resource;
+ if (!aclsupport)
+ word0 &= ~FATTR4_WORD0_ACL;
+ if (!exp->ex_fslocs.locations)
+ word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
WRITE32(2);
- WRITE32(aclsupport ?
- NFSD_SUPPORTED_ATTRS_WORD0 :
- NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL);
+ WRITE32(word0);
WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
}
if (bmval0 & FATTR4_WORD0_TYPE) {
@@ -1402,7 +1557,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval0 & FATTR4_WORD0_FSID) {
if ((buflen -= 16) < 0)
goto out_resource;
- if (is_fsid(fhp, rqstp->rq_reffh)) {
+ if (exp->ex_fslocs.migrated) {
+ WRITE64(NFS4_REFERRAL_FSID_MAJOR);
+ WRITE64(NFS4_REFERRAL_FSID_MINOR);
+ } else if (is_fsid(fhp, rqstp->rq_reffh)) {
WRITE64((u64)exp->ex_fsid);
WRITE64((u64)0);
} else {
@@ -1425,7 +1583,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
if ((buflen -= 4) < 0)
goto out_resource;
- WRITE32(0);
+ WRITE32(rdattr_err);
}
if (bmval0 & FATTR4_WORD0_ACL) {
struct nfs4_ace *ace;
@@ -1513,6 +1671,13 @@ out_acl:
goto out_resource;
WRITE64((u64) statfs.f_files);
}
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
+ status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
if ((buflen -= 4) < 0)
goto out_resource;
@@ -1536,12 +1701,12 @@ out_acl:
if (bmval0 & FATTR4_WORD0_MAXREAD) {
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ WRITE64((u64) svc_max_payload(rqstp));
}
if (bmval0 & FATTR4_WORD0_MAXWRITE) {
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ WRITE64((u64) svc_max_payload(rqstp));
}
if (bmval1 & FATTR4_WORD1_MODE) {
if ((buflen -= 4) < 0)
@@ -1845,7 +2010,6 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge
nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
resp->p, &buflen, getattr->ga_bmval,
resp->rqstp);
-
if (!nfserr)
resp->p += buflen;
return nfserr;
@@ -2039,7 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n
}
static int
-nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read)
+nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_read *read)
{
u32 eof;
int v, pn;
@@ -2054,31 +2219,33 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
RESERVE_SPACE(8); /* eof flag and byte count */
- maxcount = NFSSVC_MAXBLKSIZE;
+ maxcount = svc_max_payload(resp->rqstp);
if (maxcount > read->rd_length)
maxcount = read->rd_length;
len = maxcount;
v = 0;
while (len > 0) {
- pn = resp->rqstp->rq_resused;
- svc_take_page(resp->rqstp);
- read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]);
- read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE;
+ pn = resp->rqstp->rq_resused++;
+ resp->rqstp->rq_vec[v].iov_base =
+ page_address(resp->rqstp->rq_respages[pn]);
+ resp->rqstp->rq_vec[v].iov_len =
+ len < PAGE_SIZE ? len : PAGE_SIZE;
v++;
len -= PAGE_SIZE;
}
read->rd_vlen = v;
nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
- read->rd_offset, read->rd_iov, read->rd_vlen,
+ read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
&maxcount);
if (nfserr == nfserr_symlink)
nfserr = nfserr_inval;
if (nfserr)
return nfserr;
- eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size);
+ eof = (read->rd_offset + maxcount >=
+ read->rd_fhp->fh_dentry->d_inode->i_size);
WRITE32(eof);
WRITE32(maxcount);
@@ -2088,7 +2255,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
resp->xbuf->page_len = maxcount;
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
if (maxcount&3) {
@@ -2113,8 +2279,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
if (resp->xbuf->page_len)
return nfserr_resource;
- svc_take_page(resp->rqstp);
- page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
maxcount = PAGE_SIZE;
RESERVE_SPACE(4);
@@ -2138,7 +2303,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
resp->xbuf->page_len = maxcount;
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = p;
resp->xbuf->tail[0].iov_len = 0;
if (maxcount&3) {
@@ -2189,8 +2353,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
goto err_no_verf;
}
- svc_take_page(resp->rqstp);
- page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
readdir->common.err = 0;
readdir->buflen = maxcount;
readdir->buffer = page;
@@ -2215,10 +2378,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
p = readdir->buffer;
*p++ = 0; /* no more entries */
*p++ = htonl(readdir->common.err == nfserr_eof);
- resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ resp->xbuf->page_len = ((char*)p) - (char*)page_address(
+ resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
/* Use rest of head for padding and remaining ops: */
- resp->rqstp->rq_restailpage = 0;
resp->xbuf->tail[0].iov_base = tailbase;
resp->xbuf->tail[0].iov_len = 0;
resp->p = resp->xbuf->tail[0].iov_base;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c6a477c20ec..39aed901514b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -57,6 +57,7 @@ enum {
NFSD_Pool_Threads,
NFSD_Versions,
NFSD_Ports,
+ NFSD_MaxBlkSize,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
* with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -82,6 +83,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size);
static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
static ssize_t write_ports(struct file *file, char *buf, size_t size);
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
@@ -100,6 +102,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Pool_Threads] = write_pool_threads,
[NFSD_Versions] = write_versions,
[NFSD_Ports] = write_ports,
+ [NFSD_MaxBlkSize] = write_maxblksize,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_RecoveryDir] = write_recoverydir,
@@ -523,18 +526,20 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
err = nfsd_create_serv();
if (!err) {
int proto = 0;
- err = lockd_up(proto);
- if (!err) {
- err = svc_addsock(nfsd_serv, fd, buf, &proto);
- if (err)
- lockd_down();
+ err = svc_addsock(nfsd_serv, fd, buf, &proto);
+ if (err >= 0) {
+ err = lockd_up(proto);
+ if (err < 0)
+ svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf);
}
/* Decrease the count, but don't shutdown the
* the service
*/
+ lock_kernel();
nfsd_serv->sv_nrthreads--;
+ unlock_kernel();
}
- return err;
+ return err < 0 ? err : 0;
}
if (buf[0] == '-') {
char *toclose = kstrdup(buf+1, GFP_KERNEL);
@@ -545,12 +550,43 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, toclose);
unlock_kernel();
+ if (len >= 0)
+ lockd_down();
kfree(toclose);
return len;
}
return -EINVAL;
}
+int nfsd_max_blksize;
+
+static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ if (size > 0) {
+ int bsize;
+ int rv = get_int(&mesg, &bsize);
+ if (rv)
+ return rv;
+ /* force bsize into allowed range and
+ * required alignment.
+ */
+ if (bsize < 1024)
+ bsize = 1024;
+ if (bsize > NFSSVC_MAXBLKSIZE)
+ bsize = NFSSVC_MAXBLKSIZE;
+ bsize &= ~(1024-1);
+ lock_kernel();
+ if (nfsd_serv && nfsd_serv->sv_nrthreads) {
+ unlock_kernel();
+ return -EBUSY;
+ }
+ nfsd_max_blksize = bsize;
+ unlock_kernel();
+ }
+ return sprintf(buf, "%d\n", nfsd_max_blksize);
+}
+
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);
@@ -616,6 +652,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 06cd0db0f32b..9ee1dab5d44a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -146,20 +146,20 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
* status, 17 words for fattr, and 1 word for the byte count.
*/
- if (NFSSVC_MAXBLKSIZE < argp->count) {
+ if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
printk(KERN_NOTICE
"oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
ntohs(rqstp->rq_addr.sin_port),
argp->count);
- argp->count = NFSSVC_MAXBLKSIZE;
+ argp->count = NFSSVC_MAXBLKSIZE_V2;
}
svc_reserve(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count;
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
&resp->count);
if (nfserr) return nfserr;
@@ -185,7 +185,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
argp->offset,
- argp->vec, argp->vlen,
+ rqstp->rq_vec, argp->vlen,
argp->len,
&stable);
return nfsd_return_attrs(nfserr, resp);
@@ -225,7 +225,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
nfserr = nfserr_exist;
if (isdotent(argp->name, argp->len))
goto done;
- fh_lock(dirfhp);
+ fh_lock_nested(dirfhp, I_MUTEX_PARENT);
dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
if (IS_ERR(dchild)) {
nfserr = nfserrno(PTR_ERR(dchild));
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
- PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
+ PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4),
PROC(none, void, void, none, RC_NOCACHE, ST),
PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 19443056ec30..6fa6340a5fb8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -198,9 +198,26 @@ int nfsd_create_serv(void)
unlock_kernel();
return 0;
}
+ if (nfsd_max_blksize == 0) {
+ /* choose a suitable default */
+ struct sysinfo i;
+ si_meminfo(&i);
+ /* Aim for 1/4096 of memory per thread
+ * This gives 1MB on 4Gig machines
+ * But only uses 32K on 128M machines.
+ * Bottom out at 8K on 32M and smaller.
+ * Of course, this is only a default.
+ */
+ nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
+ i.totalram <<= PAGE_SHIFT - 12;
+ while (nfsd_max_blksize > i.totalram &&
+ nfsd_max_blksize >= 8*1024*2)
+ nfsd_max_blksize /= 2;
+ }
atomic_set(&nfsd_busy, 0);
- nfsd_serv = svc_create_pooled(&nfsd_program, NFSD_BUFSIZE,
+ nfsd_serv = svc_create_pooled(&nfsd_program,
+ NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize,
nfsd_last_thread,
nfsd, SIG_NOCLEAN, THIS_MODULE);
if (nfsd_serv == NULL)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 3f14a17eaa6e..1135c0d14557 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -254,19 +254,18 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > NFSSVC_MAXBLKSIZE_V2)
+ len = NFSSVC_MAXBLKSIZE_V2;
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
*/
v=0;
while (len > 0) {
- pn=rqstp->rq_resused;
- svc_take_page(rqstp);
- args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
- args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
- len -= args->vec[v].iov_len;
+ pn = rqstp->rq_resused++;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
+ len -= rqstp->rq_vec[v].iov_len;
v++;
}
args->vlen = v;
@@ -286,21 +285,21 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */
len = args->len = ntohl(*p++);
- args->vec[0].iov_base = (void*)p;
- args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
(((void*)p) - rqstp->rq_arg.head[0].iov_base);
- if (len > NFSSVC_MAXBLKSIZE)
- len = NFSSVC_MAXBLKSIZE;
+ if (len > NFSSVC_MAXBLKSIZE_V2)
+ len = NFSSVC_MAXBLKSIZE_V2;
v = 0;
- while (len > args->vec[v].iov_len) {
- len -= args->vec[v].iov_len;
+ while (len > rqstp->rq_vec[v].iov_len) {
+ len -= rqstp->rq_vec[v].iov_len;
v++;
- args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
- args->vec[v].iov_len = PAGE_SIZE;
+ rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
+ rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
- args->vec[v].iov_len = len;
+ rqstp->rq_vec[v].iov_len = len;
args->vlen = v+1;
- return args->vec[0].iov_len > 0;
+ return rqstp->rq_vec[0].iov_len > 0;
}
int
@@ -333,8 +332,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka
{
if (!(p = decode_fh(p, &args->fh)))
return 0;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -375,8 +373,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
- svc_take_page(rqstp);
- args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
return xdr_argsize_check(rqstp, p);
}
@@ -416,7 +413,6 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->len;
if (resp->len & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
@@ -436,7 +432,6 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad the tail */
- rqstp->rq_restailpage = 0;
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
@@ -463,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
{
struct kstatfs *stat = &resp->stats;
- *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */
+ *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */
*p++ = htonl(stat->f_bsize);
*p++ = htonl(stat->f_blocks);
*p++ = htonl(stat->f_bfree);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 443ebc52e382..1141bd29e4e3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -54,6 +54,7 @@
#include <linux/nfsd_idmap.h>
#include <linux/security.h>
#endif /* CONFIG_NFSD_V4 */
+#include <linux/jhash.h>
#include <asm/uaccess.h>
@@ -81,10 +82,19 @@ struct raparms {
dev_t p_dev;
int p_set;
struct file_ra_state p_ra;
+ unsigned int p_hindex;
};
+struct raparm_hbucket {
+ struct raparms *pb_head;
+ spinlock_t pb_lock;
+} ____cacheline_aligned_in_smp;
+
static struct raparms * raparml;
-static struct raparms * raparm_cache;
+#define RAPARM_HASH_BITS 4
+#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
+#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
+static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -437,13 +447,11 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
} else if (error < 0)
goto out_nfserr;
- if (pacl) {
- error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
- if (error < 0)
- goto out_nfserr;
- }
+ error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+ if (error < 0)
+ goto out_nfserr;
- if (dpacl) {
+ if (S_ISDIR(inode->i_mode)) {
error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
if (error < 0)
goto out_nfserr;
@@ -743,16 +751,20 @@ nfsd_sync_dir(struct dentry *dp)
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
-static DEFINE_SPINLOCK(ra_lock);
static inline struct raparms *
nfsd_get_raparms(dev_t dev, ino_t ino)
{
struct raparms *ra, **rap, **frap = NULL;
int depth = 0;
+ unsigned int hash;
+ struct raparm_hbucket *rab;
- spin_lock(&ra_lock);
- for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
+ hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
+ rab = &raparm_hash[hash];
+
+ spin_lock(&rab->pb_lock);
+ for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
if (ra->p_ino == ino && ra->p_dev == dev)
goto found;
depth++;
@@ -761,7 +773,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
}
depth = nfsdstats.ra_size*11/10;
if (!frap) {
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
return NULL;
}
rap = frap;
@@ -769,15 +781,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
ra->p_dev = dev;
ra->p_ino = ino;
ra->p_set = 0;
+ ra->p_hindex = hash;
found:
- if (rap != &raparm_cache) {
+ if (rap != &rab->pb_head) {
*rap = ra->p_next;
- ra->p_next = raparm_cache;
- raparm_cache = ra;
+ ra->p_next = rab->pb_head;
+ rab->pb_head = ra;
}
ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
return ra;
}
@@ -791,22 +804,26 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
{
unsigned long count = desc->count;
struct svc_rqst *rqstp = desc->arg.data;
+ struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
if (size > count)
size = count;
if (rqstp->rq_res.page_len == 0) {
get_page(page);
- rqstp->rq_respages[rqstp->rq_resused++] = page;
+ put_page(*pp);
+ *pp = page;
+ rqstp->rq_resused++;
rqstp->rq_res.page_base = offset;
rqstp->rq_res.page_len = size;
- } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) {
+ } else if (page != pp[-1]) {
get_page(page);
- rqstp->rq_respages[rqstp->rq_resused++] = page;
+ put_page(*pp);
+ *pp = page;
+ rqstp->rq_resused++;
rqstp->rq_res.page_len += size;
- } else {
+ } else
rqstp->rq_res.page_len += size;
- }
desc->count = count - size;
desc->written += size;
@@ -837,7 +854,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
file->f_ra = ra->p_ra;
if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
- svc_pushback_unused_pages(rqstp);
+ rqstp->rq_resused = 1;
err = file->f_op->sendfile(file, &offset, *count,
nfsd_read_actor, rqstp);
} else {
@@ -849,11 +866,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
/* Write back readahead params */
if (ra) {
- spin_lock(&ra_lock);
+ struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+ spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
- spin_unlock(&ra_lock);
+ spin_unlock(&rab->pb_lock);
}
if (err >= 0) {
@@ -1829,11 +1847,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
void
nfsd_racache_shutdown(void)
{
- if (!raparm_cache)
+ if (!raparml)
return;
dprintk("nfsd: freeing readahead buffers.\n");
kfree(raparml);
- raparm_cache = raparml = NULL;
+ raparml = NULL;
}
/*
* Initialize readahead param cache
@@ -1842,19 +1860,31 @@ int
nfsd_racache_init(int cache_size)
{
int i;
+ int j = 0;
+ int nperbucket;
+
- if (raparm_cache)
+ if (raparml)
return 0;
+ if (cache_size < 2*RAPARM_HASH_SIZE)
+ cache_size = 2*RAPARM_HASH_SIZE;
raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
if (raparml != NULL) {
dprintk("nfsd: allocating %d readahead buffers.\n",
cache_size);
+ for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) {
+ raparm_hash[i].pb_head = NULL;
+ spin_lock_init(&raparm_hash[i].pb_lock);
+ }
+ nperbucket = cache_size >> RAPARM_HASH_BITS;
memset(raparml, 0, sizeof(struct raparms) * cache_size);
for (i = 0; i < cache_size - 1; i++) {
- raparml[i].p_next = raparml + i + 1;
+ if (i % nperbucket == 0)
+ raparm_hash[j++].pb_head = raparml + i;
+ if (i % nperbucket < nperbucket-1)
+ raparml[i].p_next = raparml + i + 1;
}
- raparm_cache = raparml;
} else {
printk(KERN_WARNING
"nfsd: Could not allocate memory read-ahead cache.\n");
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 7e5a2f5ebeb0..9c69bcacad22 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1780,7 +1780,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
err = -EDQUOT;
goto out_end_trans;
}
- if (!dir || !dir->i_nlink) {
+ if (!dir->i_nlink) {
err = -EPERM;
goto out_bad_inode;
}