diff options
Diffstat (limited to 'fs')
38 files changed, 8173 insertions, 935 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 68f4561423ff..674cfbb83a95 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -995,6 +995,18 @@ config AFFS_FS To compile this file system support as a module, choose M here: the module will be called affs. If unsure, say N. +config ECRYPT_FS + tristate "eCrypt filesystem layer support (EXPERIMENTAL)" + depends on EXPERIMENTAL && KEYS && CRYPTO + help + Encrypted filesystem that operates on the VFS layer. See + <file:Documentation/ecryptfs.txt> to learn more about + eCryptfs. Userspace components are required and can be + obtained from <http://ecryptfs.sf.net>. + + To compile this file system support as a module, choose M here: the + module will be called ecryptfs. + config HFS_FS tristate "Apple Macintosh file system support (EXPERIMENTAL)" depends on BLOCK && EXPERIMENTAL diff --git a/fs/Makefile b/fs/Makefile index 819b2a93bebe..fd24d67a7cdb 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ obj-$(CONFIG_HFS_FS) += hfs/ +obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ obj-$(CONFIG_VXFS_FS) += freevxfs/ obj-$(CONFIG_NFS_FS) += nfs/ obj-$(CONFIG_EXPORTFS) += exportfs/ diff --git a/fs/dcache.c b/fs/dcache.c index fc2faa44f8d1..2355bddad8de 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -291,9 +291,9 @@ struct dentry * dget_locked(struct dentry *dentry) * it can be unhashed only if it has no children, or if it is the root * of a filesystem. * - * If the inode has a DCACHE_DISCONNECTED alias, then prefer + * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer * any other hashed alias over that one unless @want_discon is set, - * in which case only return a DCACHE_DISCONNECTED alias. + * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ static struct dentry * __d_find_alias(struct inode *inode, int want_discon) @@ -309,7 +309,8 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon) prefetch(next); alias = list_entry(tmp, struct dentry, d_alias); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { - if (alias->d_flags & DCACHE_DISCONNECTED) + if (IS_ROOT(alias) && + (alias->d_flags & DCACHE_DISCONNECTED)) discon_alias = alias; else if (!want_discon) { __dget_locked(alias); @@ -1004,7 +1005,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) { struct dentry *new = NULL; - if (inode) { + if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&dcache_lock); new = __d_find_alias(inode, 1); if (new) { diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile new file mode 100644 index 000000000000..ca6562451eeb --- /dev/null +++ b/fs/ecryptfs/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Linux 2.6 eCryptfs +# + +obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o + +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c new file mode 100644 index 000000000000..ed35a9712fa1 --- /dev/null +++ b/fs/ecryptfs/crypto.c @@ -0,0 +1,1659 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2004 Erez Zadok + * Copyright (C) 2001-2004 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * Michael C. Thompson <mcthomps@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/pagemap.h> +#include <linux/random.h> +#include <linux/compiler.h> +#include <linux/key.h> +#include <linux/namei.h> +#include <linux/crypto.h> +#include <linux/file.h> +#include <linux/scatterlist.h> +#include "ecryptfs_kernel.h" + +static int +ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv); +static int +ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv); + +/** + * ecryptfs_to_hex + * @dst: Buffer to take hex character representation of contents of + * src; must be at least of size (src_size * 2) + * @src: Buffer to be converted to a hex string respresentation + * @src_size: number of bytes to convert + */ +void ecryptfs_to_hex(char *dst, char *src, size_t src_size) +{ + int x; + + for (x = 0; x < src_size; x++) + sprintf(&dst[x * 2], "%.2x", (unsigned char)src[x]); +} + +/** + * ecryptfs_from_hex + * @dst: Buffer to take the bytes from src hex; must be at least of + * size (src_size / 2) + * @src: Buffer to be converted from a hex string respresentation to raw value + * @dst_size: size of dst buffer, or number of hex characters pairs to convert + */ +void ecryptfs_from_hex(char *dst, char *src, int dst_size) +{ + int x; + char tmp[3] = { 0, }; + + for (x = 0; x < dst_size; x++) { + tmp[0] = src[x * 2]; + tmp[1] = src[x * 2 + 1]; + dst[x] = (unsigned char)simple_strtol(tmp, NULL, 16); + } +} + +/** + * ecryptfs_calculate_md5 - calculates the md5 of @src + * @dst: Pointer to 16 bytes of allocated memory + * @crypt_stat: Pointer to crypt_stat struct for the current inode + * @src: Data to be md5'd + * @len: Length of @src + * + * Uses the allocated crypto context that crypt_stat references to + * generate the MD5 sum of the contents of src. + */ +static int ecryptfs_calculate_md5(char *dst, + struct ecryptfs_crypt_stat *crypt_stat, + char *src, int len) +{ + int rc = 0; + struct scatterlist sg; + + mutex_lock(&crypt_stat->cs_md5_tfm_mutex); + sg_init_one(&sg, (u8 *)src, len); + if (!crypt_stat->md5_tfm) { + crypt_stat->md5_tfm = + crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); + if (!crypt_stat->md5_tfm) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error attempting to " + "allocate crypto context\n"); + goto out; + } + } + crypto_digest_init(crypt_stat->md5_tfm); + crypto_digest_update(crypt_stat->md5_tfm, &sg, 1); + crypto_digest_final(crypt_stat->md5_tfm, dst); + mutex_unlock(&crypt_stat->cs_md5_tfm_mutex); +out: + return rc; +} + +/** + * ecryptfs_derive_iv + * @iv: destination for the derived iv vale + * @crypt_stat: Pointer to crypt_stat struct for the current inode + * @offset: Offset of the page whose's iv we are to derive + * + * Generate the initialization vector from the given root IV and page + * offset. + * + * Returns zero on success; non-zero on error. + */ +static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, + pgoff_t offset) +{ + int rc = 0; + char dst[MD5_DIGEST_SIZE]; + char src[ECRYPTFS_MAX_IV_BYTES + 16]; + + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "root iv:\n"); + ecryptfs_dump_hex(crypt_stat->root_iv, crypt_stat->iv_bytes); + } + /* TODO: It is probably secure to just cast the least + * significant bits of the root IV into an unsigned long and + * add the offset to that rather than go through all this + * hashing business. -Halcrow */ + memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes); + memset((src + crypt_stat->iv_bytes), 0, 16); + snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "source:\n"); + ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16)); + } + rc = ecryptfs_calculate_md5(dst, crypt_stat, src, + (crypt_stat->iv_bytes + 16)); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error attempting to compute " + "MD5 while generating IV for a page\n"); + goto out; + } + memcpy(iv, dst, crypt_stat->iv_bytes); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "derived iv:\n"); + ecryptfs_dump_hex(iv, crypt_stat->iv_bytes); + } +out: + return rc; +} + +/** + * ecryptfs_init_crypt_stat + * @crypt_stat: Pointer to the crypt_stat struct to initialize. + * + * Initialize the crypt_stat structure. + */ +void +ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) +{ + memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); + mutex_init(&crypt_stat->cs_mutex); + mutex_init(&crypt_stat->cs_tfm_mutex); + mutex_init(&crypt_stat->cs_md5_tfm_mutex); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED); +} + +/** + * ecryptfs_destruct_crypt_stat + * @crypt_stat: Pointer to the crypt_stat struct to initialize. + * + * Releases all memory associated with a crypt_stat struct. + */ +void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) +{ + if (crypt_stat->tfm) + crypto_free_tfm(crypt_stat->tfm); + if (crypt_stat->md5_tfm) + crypto_free_tfm(crypt_stat->md5_tfm); + memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); +} + +void ecryptfs_destruct_mount_crypt_stat( + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + if (mount_crypt_stat->global_auth_tok_key) + key_put(mount_crypt_stat->global_auth_tok_key); + if (mount_crypt_stat->global_key_tfm) + crypto_free_tfm(mount_crypt_stat->global_key_tfm); + memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat)); +} + +/** + * virt_to_scatterlist + * @addr: Virtual address + * @size: Size of data; should be an even multiple of the block size + * @sg: Pointer to scatterlist array; set to NULL to obtain only + * the number of scatterlist structs required in array + * @sg_size: Max array size + * + * Fills in a scatterlist array with page references for a passed + * virtual address. + * + * Returns the number of scatterlist structs in array used + */ +int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, + int sg_size) +{ + int i = 0; + struct page *pg; + int offset; + int remainder_of_page; + + while (size > 0 && i < sg_size) { + pg = virt_to_page(addr); + offset = offset_in_page(addr); + if (sg) { + sg[i].page = pg; + sg[i].offset = offset; + } + remainder_of_page = PAGE_CACHE_SIZE - offset; + if (size >= remainder_of_page) { + if (sg) + sg[i].length = remainder_of_page; + addr += remainder_of_page; + size -= remainder_of_page; + } else { + if (sg) + sg[i].length = size; + addr += size; + size = 0; + } + i++; + } + if (size > 0) + return -ENOMEM; + return i; +} + +/** + * encrypt_scatterlist + * @crypt_stat: Pointer to the crypt_stat struct to initialize. + * @dest_sg: Destination of encrypted data + * @src_sg: Data to be encrypted + * @size: Length of data to be encrypted + * @iv: iv to use during encryption + * + * Returns the number of bytes encrypted; negative value on error + */ +static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, + struct scatterlist *dest_sg, + struct scatterlist *src_sg, int size, + unsigned char *iv) +{ + int rc = 0; + + BUG_ON(!crypt_stat || !crypt_stat->tfm + || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_STRUCT_INITIALIZED)); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n", + crypt_stat->key_size); + ecryptfs_dump_hex(crypt_stat->key, + crypt_stat->key_size); + } + /* Consider doing this once, when the file is opened */ + mutex_lock(&crypt_stat->cs_tfm_mutex); + rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", + rc); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + rc = -EINVAL; + goto out; + } + ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); + crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv); + mutex_unlock(&crypt_stat->cs_tfm_mutex); +out: + return rc; +} + +static void +ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx, + int *byte_offset, + struct ecryptfs_crypt_stat *crypt_stat, + unsigned long extent_num) +{ + unsigned long lower_extent_num; + int extents_occupied_by_headers_at_front; + int bytes_occupied_by_headers_at_front; + int extent_offset; + int extents_per_page; + + bytes_occupied_by_headers_at_front = + ( crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front ); + extents_occupied_by_headers_at_front = + ( bytes_occupied_by_headers_at_front + / crypt_stat->extent_size ); + lower_extent_num = extents_occupied_by_headers_at_front + extent_num; + extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; + (*lower_page_idx) = lower_extent_num / extents_per_page; + extent_offset = lower_extent_num % extents_per_page; + (*byte_offset) = extent_offset * crypt_stat->extent_size; + ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = " + "[%d]\n", crypt_stat->header_extent_size); + ecryptfs_printk(KERN_DEBUG, " * crypt_stat->" + "num_header_extents_at_front = [%d]\n", + crypt_stat->num_header_extents_at_front); + ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_" + "front = [%d]\n", extents_occupied_by_headers_at_front); + ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n", + lower_extent_num); + ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n", + extents_per_page); + ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n", + (*lower_page_idx)); + ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n", + extent_offset); + ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n", + (*byte_offset)); +} + +static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx, + struct page *lower_page, + struct inode *lower_inode, + int byte_offset_in_page, int bytes_to_write) +{ + int rc = 0; + + if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { + rc = ecryptfs_commit_lower_page(lower_page, lower_inode, + ctx->param.lower_file, + byte_offset_in_page, + bytes_to_write); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error calling lower " + "commit; rc = [%d]\n", rc); + goto out; + } + } else { + rc = ecryptfs_writepage_and_release_lower_page(lower_page, + lower_inode, + ctx->param.wbc); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error calling lower " + "writepage(); rc = [%d]\n", rc); + goto out; + } + } +out: + return rc; +} + +static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx, + struct page **lower_page, + struct inode *lower_inode, + unsigned long lower_page_idx, + int byte_offset_in_page) +{ + int rc = 0; + + if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { + /* TODO: Limit this to only the data extents that are + * needed */ + rc = ecryptfs_get_lower_page(lower_page, lower_inode, + ctx->param.lower_file, + lower_page_idx, + byte_offset_in_page, + (PAGE_CACHE_SIZE + - byte_offset_in_page)); + if (rc) { + ecryptfs_printk( + KERN_ERR, "Error attempting to grab, map, " + "and prepare_write lower page with index " + "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc); + goto out; + } + } else { + rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, + lower_inode, + lower_page_idx); + if (rc) { + ecryptfs_printk( + KERN_ERR, "Error attempting to grab and map " + "lower page with index [0x%.16x]; rc = [%d]\n", + lower_page_idx, rc); + goto out; + } + } +out: + return rc; +} + +/** + * ecryptfs_encrypt_page + * @ctx: The context of the page + * + * Encrypt an eCryptfs page. This is done on a per-extent basis. Note + * that eCryptfs pages may straddle the lower pages -- for instance, + * if the file was created on a machine with an 8K page size + * (resulting in an 8K header), and then the file is copied onto a + * host with a 32K page size, then when reading page 0 of the eCryptfs + * file, 24K of page 0 of the lower file will be read and decrypted, + * and then 8K of page 1 of the lower file will be read and decrypted. + * + * The actual operations performed on each page depends on the + * contents of the ecryptfs_page_crypt_context struct. + * + * Returns zero on success; negative on error + */ +int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx) +{ + char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + unsigned long base_extent; + unsigned long extent_offset = 0; + unsigned long lower_page_idx = 0; + unsigned long prior_lower_page_idx = 0; + struct page *lower_page; + struct inode *lower_inode; + struct ecryptfs_inode_info *inode_info; + struct ecryptfs_crypt_stat *crypt_stat; + int rc = 0; + int lower_byte_offset = 0; + int orig_byte_offset = 0; + int num_extents_per_page; +#define ECRYPTFS_PAGE_STATE_UNREAD 0 +#define ECRYPTFS_PAGE_STATE_READ 1 +#define ECRYPTFS_PAGE_STATE_MODIFIED 2 +#define ECRYPTFS_PAGE_STATE_WRITTEN 3 + int page_state; + + lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host); + inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host); + crypt_stat = &inode_info->crypt_stat; + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { + rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode, + ctx->param.lower_file); + if (rc) + ecryptfs_printk(KERN_ERR, "Error attempting to copy " + "page at index [0x%.16x]\n", + ctx->page->index); + goto out; + } + num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; + base_extent = (ctx->page->index * num_extents_per_page); + page_state = ECRYPTFS_PAGE_STATE_UNREAD; + while (extent_offset < num_extents_per_page) { + ecryptfs_extent_to_lwr_pg_idx_and_offset( + &lower_page_idx, &lower_byte_offset, crypt_stat, + (base_extent + extent_offset)); + if (prior_lower_page_idx != lower_page_idx + && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) { + rc = ecryptfs_write_out_page(ctx, lower_page, + lower_inode, + orig_byte_offset, + (PAGE_CACHE_SIZE + - orig_byte_offset)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting " + "to write out page; rc = [%d]" + "\n", rc); + goto out; + } + page_state = ECRYPTFS_PAGE_STATE_WRITTEN; + } + if (page_state == ECRYPTFS_PAGE_STATE_UNREAD + || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) { + rc = ecryptfs_read_in_page(ctx, &lower_page, + lower_inode, lower_page_idx, + lower_byte_offset); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting " + "to read in lower page with " + "index [0x%.16x]; rc = [%d]\n", + lower_page_idx, rc); + goto out; + } + orig_byte_offset = lower_byte_offset; + prior_lower_page_idx = lower_page_idx; + page_state = ECRYPTFS_PAGE_STATE_READ; + } + BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED + || page_state == ECRYPTFS_PAGE_STATE_READ)); + rc = ecryptfs_derive_iv(extent_iv, crypt_stat, + (base_extent + extent_offset)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to " + "derive IV for extent [0x%.16x]; " + "rc = [%d]\n", + (base_extent + extent_offset), rc); + goto out; + } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Encrypting extent " + "with iv:\n"); + ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " + "encryption:\n"); + ecryptfs_dump_hex((char *) + (page_address(ctx->page) + + (extent_offset + * crypt_stat->extent_size)), 8); + } + rc = ecryptfs_encrypt_page_offset( + crypt_stat, lower_page, lower_byte_offset, ctx->page, + (extent_offset * crypt_stat->extent_size), + crypt_stat->extent_size, extent_iv); + ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " + "rc = [%d]\n", + (base_extent + extent_offset), rc); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " + "encryption:\n"); + ecryptfs_dump_hex((char *)(page_address(lower_page) + + lower_byte_offset), 8); + } + page_state = ECRYPTFS_PAGE_STATE_MODIFIED; + extent_offset++; + } + BUG_ON(orig_byte_offset != 0); + rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0, + (lower_byte_offset + + crypt_stat->extent_size)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to write out " + "page; rc = [%d]\n", rc); + goto out; + } +out: + return rc; +} + +/** + * ecryptfs_decrypt_page + * @file: The ecryptfs file + * @page: The page in ecryptfs to decrypt + * + * Decrypt an eCryptfs page. This is done on a per-extent basis. Note + * that eCryptfs pages may straddle the lower pages -- for instance, + * if the file was created on a machine with an 8K page size + * (resulting in an 8K header), and then the file is copied onto a + * host with a 32K page size, then when reading page 0 of the eCryptfs + * file, 24K of page 0 of the lower file will be read and decrypted, + * and then 8K of page 1 of the lower file will be read and decrypted. + * + * Returns zero on success; negative on error + */ +int ecryptfs_decrypt_page(struct file *file, struct page *page) +{ + char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + unsigned long base_extent; + unsigned long extent_offset = 0; + unsigned long lower_page_idx = 0; + unsigned long prior_lower_page_idx = 0; + struct page *lower_page; + char *lower_page_virt = NULL; + struct inode *lower_inode; + struct ecryptfs_crypt_stat *crypt_stat; + int rc = 0; + int byte_offset; + int num_extents_per_page; + int page_state; + + crypt_stat = &(ecryptfs_inode_to_private( + page->mapping->host)->crypt_stat); + lower_inode = ecryptfs_inode_to_lower(page->mapping->host); + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) { + rc = ecryptfs_do_readpage(file, page, page->index); + if (rc) + ecryptfs_printk(KERN_ERR, "Error attempting to copy " + "page at index [0x%.16x]\n", + page->index); + goto out; + } + num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; + base_extent = (page->index * num_extents_per_page); + lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache, + SLAB_KERNEL); + if (!lower_page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error getting page for encrypted " + "lower page(s)\n"); + goto out; + } + lower_page = virt_to_page(lower_page_virt); + page_state = ECRYPTFS_PAGE_STATE_UNREAD; + while (extent_offset < num_extents_per_page) { + ecryptfs_extent_to_lwr_pg_idx_and_offset( + &lower_page_idx, &byte_offset, crypt_stat, + (base_extent + extent_offset)); + if (prior_lower_page_idx != lower_page_idx + || page_state == ECRYPTFS_PAGE_STATE_UNREAD) { + rc = ecryptfs_do_readpage(file, lower_page, + lower_page_idx); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error reading " + "lower encrypted page; rc = " + "[%d]\n", rc); + goto out; + } + prior_lower_page_idx = lower_page_idx; + page_state = ECRYPTFS_PAGE_STATE_READ; + } + rc = ecryptfs_derive_iv(extent_iv, crypt_stat, + (base_extent + extent_offset)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to " + "derive IV for extent [0x%.16x]; rc = " + "[%d]\n", + (base_extent + extent_offset), rc); + goto out; + } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Decrypting extent " + "with iv:\n"); + ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " + "decryption:\n"); + ecryptfs_dump_hex((lower_page_virt + byte_offset), 8); + } + rc = ecryptfs_decrypt_page_offset(crypt_stat, page, + (extent_offset + * crypt_stat->extent_size), + lower_page, byte_offset, + crypt_stat->extent_size, + extent_iv); + if (rc != crypt_stat->extent_size) { + ecryptfs_printk(KERN_ERR, "Error attempting to " + "decrypt extent [0x%.16x]\n", + (base_extent + extent_offset)); + goto out; + } + rc = 0; + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " + "decryption:\n"); + ecryptfs_dump_hex((char *)(page_address(page) + + byte_offset), 8); + } + extent_offset++; + } +out: + if (lower_page_virt) + kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt); + return rc; +} + +/** + * decrypt_scatterlist + * + * Returns the number of bytes decrypted; negative value on error + */ +static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, + struct scatterlist *dest_sg, + struct scatterlist *src_sg, int size, + unsigned char *iv) +{ + int rc = 0; + + /* Consider doing this once, when the file is opened */ + mutex_lock(&crypt_stat->cs_tfm_mutex); + rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", + rc); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + rc = -EINVAL; + goto out; + } + ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); + rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, + iv); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n", + rc); + goto out; + } + rc = size; +out: + return rc; +} + +/** + * ecryptfs_encrypt_page_offset + * + * Returns the number of bytes encrypted + */ +static int +ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv) +{ + struct scatterlist src_sg, dst_sg; + + src_sg.page = src_page; + src_sg.offset = src_offset; + src_sg.length = size; + dst_sg.page = dst_page; + dst_sg.offset = dst_offset; + dst_sg.length = size; + return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); +} + +/** + * ecryptfs_decrypt_page_offset + * + * Returns the number of bytes decrypted + */ +static int +ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, int dst_offset, + struct page *src_page, int src_offset, int size, + unsigned char *iv) +{ + struct scatterlist src_sg, dst_sg; + + src_sg.page = src_page; + src_sg.offset = src_offset; + src_sg.length = size; + dst_sg.page = dst_page; + dst_sg.offset = dst_offset; + dst_sg.length = size; + return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); +} + +#define ECRYPTFS_MAX_SCATTERLIST_LEN 4 + +/** + * ecryptfs_init_crypt_ctx + * @crypt_stat: Uninitilized crypt stats structure + * + * Initialize the crypto context. + * + * TODO: Performance: Keep a cache of initialized cipher contexts; + * only init if needed + */ +int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) +{ + int rc = -EINVAL; + + if (!crypt_stat->cipher) { + ecryptfs_printk(KERN_ERR, "No cipher specified\n"); + goto out; + } + ecryptfs_printk(KERN_DEBUG, + "Initializing cipher [%s]; strlen = [%d]; " + "key_size_bits = [%d]\n", + crypt_stat->cipher, (int)strlen(crypt_stat->cipher), + crypt_stat->key_size << 3); + if (crypt_stat->tfm) { + rc = 0; + goto out; + } + mutex_lock(&crypt_stat->cs_tfm_mutex); + crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher, + ECRYPTFS_DEFAULT_CHAINING_MODE + | CRYPTO_TFM_REQ_WEAK_KEY); + mutex_unlock(&crypt_stat->cs_tfm_mutex); + if (!crypt_stat->tfm) { + ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " + "Error initializing cipher [%s]\n", + crypt_stat->cipher); + goto out; + } + rc = 0; +out: + return rc; +} + +static void set_extent_mask_and_shift(struct ecryptfs_crypt_stat *crypt_stat) +{ + int extent_size_tmp; + + crypt_stat->extent_mask = 0xFFFFFFFF; + crypt_stat->extent_shift = 0; + if (crypt_stat->extent_size == 0) + return; + extent_size_tmp = crypt_stat->extent_size; + while ((extent_size_tmp & 0x01) == 0) { + extent_size_tmp >>= 1; + crypt_stat->extent_mask <<= 1; + crypt_stat->extent_shift++; + } +} + +void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) +{ + /* Default values; may be overwritten as we are parsing the + * packets. */ + crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; + set_extent_mask_and_shift(crypt_stat); + crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; + if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { + crypt_stat->header_extent_size = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; + } else + crypt_stat->header_extent_size = PAGE_CACHE_SIZE; + crypt_stat->num_header_extents_at_front = 1; +} + +/** + * ecryptfs_compute_root_iv + * @crypt_stats + * + * On error, sets the root IV to all 0's. + */ +int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat) +{ + int rc = 0; + char dst[MD5_DIGEST_SIZE]; + + BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE); + BUG_ON(crypt_stat->iv_bytes <= 0); + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, "Session key not valid; " + "cannot generate root IV\n"); + goto out; + } + rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key, + crypt_stat->key_size); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error attempting to compute " + "MD5 while generating root IV\n"); + goto out; + } + memcpy(crypt_stat->root_iv, dst, crypt_stat->iv_bytes); +out: + if (rc) { + memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes); + ECRYPTFS_SET_FLAG(crypt_stat->flags, + ECRYPTFS_SECURITY_WARNING); + } + return rc; +} + +static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) +{ + get_random_bytes(crypt_stat->key, crypt_stat->key_size); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + ecryptfs_compute_root_iv(crypt_stat); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n"); + ecryptfs_dump_hex(crypt_stat->key, + crypt_stat->key_size); + } +} + +/** + * ecryptfs_set_default_crypt_stat_vals + * @crypt_stat + * + * Default values in the event that policy does not override them. + */ +static void ecryptfs_set_default_crypt_stat_vals( + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + ecryptfs_set_default_sizes(crypt_stat); + strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER); + crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES; + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + crypt_stat->file_version = ECRYPTFS_FILE_VERSION; + crypt_stat->mount_crypt_stat = mount_crypt_stat; +} + +/** + * ecryptfs_new_file_context + * @ecryptfs_dentry + * + * If the crypto context for the file has not yet been established, + * this is where we do that. Establishing a new crypto context + * involves the following decisions: + * - What cipher to use? + * - What set of authentication tokens to use? + * Here we just worry about getting enough information into the + * authentication tokens so that we know that they are available. + * We associate the available authentication tokens with the new file + * via the set of signatures in the crypt_stat struct. Later, when + * the headers are actually written out, we may again defer to + * userspace to perform the encryption of the session key; for the + * foreseeable future, this will be the case with public key packets. + * + * Returns zero on success; non-zero otherwise + */ +/* Associate an authentication token(s) with the file */ +int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry) +{ + int rc = 0; + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + int cipher_name_len; + + ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); + /* See if there are mount crypt options */ + if (mount_crypt_stat->global_auth_tok) { + ecryptfs_printk(KERN_DEBUG, "Initializing context for new " + "file using mount_crypt_stat\n"); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++], + mount_crypt_stat->global_auth_tok_sig, + ECRYPTFS_SIG_SIZE_HEX); + cipher_name_len = + strlen(mount_crypt_stat->global_default_cipher_name); + memcpy(crypt_stat->cipher, + mount_crypt_stat->global_default_cipher_name, + cipher_name_len); + crypt_stat->cipher[cipher_name_len] = '\0'; + crypt_stat->key_size = + mount_crypt_stat->global_default_cipher_key_size; + ecryptfs_generate_new_key(crypt_stat); + } else + /* We should not encounter this scenario since we + * should detect lack of global_auth_tok at mount time + * TODO: Applies to 0.1 release only; remove in future + * release */ + BUG(); + rc = ecryptfs_init_crypt_ctx(crypt_stat); + if (rc) + ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " + "context for cipher [%s]: rc = [%d]\n", + crypt_stat->cipher, rc); + return rc; +} + +/** + * contains_ecryptfs_marker - check for the ecryptfs marker + * @data: The data block in which to check + * + * Returns one if marker found; zero if not found + */ +int contains_ecryptfs_marker(char *data) +{ + u32 m_1, m_2; + + memcpy(&m_1, data, 4); + m_1 = be32_to_cpu(m_1); + memcpy(&m_2, (data + 4), 4); + m_2 = be32_to_cpu(m_2); + if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) + return 1; + ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " + "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2, + MAGIC_ECRYPTFS_MARKER); + ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = " + "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER)); + return 0; +} + +struct ecryptfs_flag_map_elem { + u32 file_flag; + u32 local_flag; +}; + +/* Add support for additional flags by adding elements here. */ +static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { + {0x00000001, ECRYPTFS_ENABLE_HMAC}, + {0x00000002, ECRYPTFS_ENCRYPTED} +}; + +/** + * ecryptfs_process_flags + * @crypt_stat + * @page_virt: Source data to be parsed + * @bytes_read: Updated with the number of bytes read + * + * Returns zero on success; non-zero if the flag set is invalid + */ +static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, + char *page_virt, int *bytes_read) +{ + int rc = 0; + int i; + u32 flags; + + memcpy(&flags, page_virt, 4); + flags = be32_to_cpu(flags); + for (i = 0; i < ((sizeof(ecryptfs_flag_map) + / sizeof(struct ecryptfs_flag_map_elem))); i++) + if (flags & ecryptfs_flag_map[i].file_flag) { + ECRYPTFS_SET_FLAG(crypt_stat->flags, + ecryptfs_flag_map[i].local_flag); + } else + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, + ecryptfs_flag_map[i].local_flag); + /* Version is in top 8 bits of the 32-bit flag vector */ + crypt_stat->file_version = ((flags >> 24) & 0xFF); + (*bytes_read) = 4; + return rc; +} + +/** + * write_ecryptfs_marker + * @page_virt: The pointer to in a page to begin writing the marker + * @written: Number of bytes written + * + * Marker = 0x3c81b7f5 + */ +static void write_ecryptfs_marker(char *page_virt, size_t *written) +{ + u32 m_1, m_2; + + get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); + m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); + m_1 = cpu_to_be32(m_1); + memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); + m_2 = cpu_to_be32(m_2); + memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2, + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); + (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; +} + +static void +write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, + size_t *written) +{ + u32 flags = 0; + int i; + + for (i = 0; i < ((sizeof(ecryptfs_flag_map) + / sizeof(struct ecryptfs_flag_map_elem))); i++) + if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ecryptfs_flag_map[i].local_flag)) + flags |= ecryptfs_flag_map[i].file_flag; + /* Version is in top 8 bits of the 32-bit flag vector */ + flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); + flags = cpu_to_be32(flags); + memcpy(page_virt, &flags, 4); + (*written) = 4; +} + +struct ecryptfs_cipher_code_str_map_elem { + char cipher_str[16]; + u16 cipher_code; +}; + +/* Add support for additional ciphers by adding elements here. The + * cipher_code is whatever OpenPGP applicatoins use to identify the + * ciphers. List in order of probability. */ +static struct ecryptfs_cipher_code_str_map_elem +ecryptfs_cipher_code_str_map[] = { + {"aes",RFC2440_CIPHER_AES_128 }, + {"blowfish", RFC2440_CIPHER_BLOWFISH}, + {"des3_ede", RFC2440_CIPHER_DES3_EDE}, + {"cast5", RFC2440_CIPHER_CAST_5}, + {"twofish", RFC2440_CIPHER_TWOFISH}, + {"cast6", RFC2440_CIPHER_CAST_6}, + {"aes", RFC2440_CIPHER_AES_192}, + {"aes", RFC2440_CIPHER_AES_256} +}; + +/** + * ecryptfs_code_for_cipher_string + * @str: The string representing the cipher name + * + * Returns zero on no match, or the cipher code on match + */ +u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) +{ + int i; + u16 code = 0; + struct ecryptfs_cipher_code_str_map_elem *map = + ecryptfs_cipher_code_str_map; + + if (strcmp(crypt_stat->cipher, "aes") == 0) { + switch (crypt_stat->key_size) { + case 16: + code = RFC2440_CIPHER_AES_128; + break; + case 24: + code = RFC2440_CIPHER_AES_192; + break; + case 32: + code = RFC2440_CIPHER_AES_256; + } + } else { + for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) + if (strcmp(crypt_stat->cipher, map[i].cipher_str) == 0){ + code = map[i].cipher_code; + break; + } + } + return code; +} + +/** + * ecryptfs_cipher_code_to_string + * @str: Destination to write out the cipher name + * @cipher_code: The code to convert to cipher name string + * + * Returns zero on success + */ +int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code) +{ + int rc = 0; + int i; + + str[0] = '\0'; + for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) + if (cipher_code == ecryptfs_cipher_code_str_map[i].cipher_code) + strcpy(str, ecryptfs_cipher_code_str_map[i].cipher_str); + if (str[0] == '\0') { + ecryptfs_printk(KERN_WARNING, "Cipher code not recognized: " + "[%d]\n", cipher_code); + rc = -EINVAL; + } + return rc; +} + +/** + * ecryptfs_read_header_region + * @data + * @dentry + * @nd + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_read_header_region(char *data, struct dentry *dentry, + struct vfsmount *mnt) +{ + struct file *file; + mm_segment_t oldfs; + int rc; + + mnt = mntget(mnt); + file = dentry_open(dentry, mnt, O_RDONLY); + if (IS_ERR(file)) { + ecryptfs_printk(KERN_DEBUG, "Error opening file to " + "read header region\n"); + mntput(mnt); + rc = PTR_ERR(file); + goto out; + } + file->f_pos = 0; + oldfs = get_fs(); + set_fs(get_ds()); + /* For releases 0.1 and 0.2, all of the header information + * fits in the first data extent-sized region. */ + rc = file->f_op->read(file, (char __user *)data, + ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos); + set_fs(oldfs); + fput(file); + rc = 0; +out: + return rc; +} + +static void +write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, + size_t *written) +{ + u32 header_extent_size; + u16 num_header_extents_at_front; + + header_extent_size = (u32)crypt_stat->header_extent_size; + num_header_extents_at_front = + (u16)crypt_stat->num_header_extents_at_front; + header_extent_size = cpu_to_be32(header_extent_size); + memcpy(virt, &header_extent_size, 4); + virt += 4; + num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); + memcpy(virt, &num_header_extents_at_front, 2); + (*written) = 6; +} + +struct kmem_cache *ecryptfs_header_cache_0; +struct kmem_cache *ecryptfs_header_cache_1; +struct kmem_cache *ecryptfs_header_cache_2; + +/** + * ecryptfs_write_headers_virt + * @page_virt + * @crypt_stat + * @ecryptfs_dentry + * + * Format version: 1 + * + * Header Extent: + * Octets 0-7: Unencrypted file size (big-endian) + * Octets 8-15: eCryptfs special marker + * Octets 16-19: Flags + * Octet 16: File format version number (between 0 and 255) + * Octets 17-18: Reserved + * Octet 19: Bit 1 (lsb): Reserved + * Bit 2: Encrypted? + * Bits 3-8: Reserved + * Octets 20-23: Header extent size (big-endian) + * Octets 24-25: Number of header extents at front of file + * (big-endian) + * Octet 26: Begin RFC 2440 authentication token packet set + * Data Extent 0: + * Lower data (CBC encrypted) + * Data Extent 1: + * Lower data (CBC encrypted) + * ... + * + * Returns zero on success + */ +int ecryptfs_write_headers_virt(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry) +{ + int rc; + size_t written; + size_t offset; + + offset = ECRYPTFS_FILE_SIZE_BYTES; + write_ecryptfs_marker((page_virt + offset), &written); + offset += written; + write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); + offset += written; + write_header_metadata((page_virt + offset), crypt_stat, &written); + offset += written; + rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, + ecryptfs_dentry, &written, + PAGE_CACHE_SIZE - offset); + if (rc) + ecryptfs_printk(KERN_WARNING, "Error generating key packet " + "set; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_write_headers + * @lower_file: The lower file struct, which was returned from dentry_open + * + * Write the file headers out. This will likely involve a userspace + * callout, in which the session key is encrypted with one or more + * public keys and/or the passphrase necessary to do the encryption is + * retrieved via a prompt. Exactly what happens at this point should + * be policy-dependent. + * + * Returns zero on success; non-zero on error + */ +int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file) +{ + mm_segment_t oldfs; + struct ecryptfs_crypt_stat *crypt_stat; + char *page_virt; + int current_header_page; + int header_pages; + int rc = 0; + + crypt_stat = &ecryptfs_inode_to_private( + ecryptfs_dentry->d_inode)->crypt_stat; + if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_ENCRYPTED))) { + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_KEY_VALID)) { + ecryptfs_printk(KERN_DEBUG, "Key is " + "invalid; bailing out\n"); + rc = -EINVAL; + goto out; + } + } else { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, + "Called with crypt_stat->encrypted == 0\n"); + goto out; + } + /* Released in this function */ + page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER); + if (!page_virt) { + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out; + } + memset(page_virt, 0, PAGE_CACHE_SIZE); + rc = ecryptfs_write_headers_virt(page_virt, crypt_stat, + ecryptfs_dentry); + if (unlikely(rc)) { + ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n"); + memset(page_virt, 0, PAGE_CACHE_SIZE); + goto out_free; + } + ecryptfs_printk(KERN_DEBUG, + "Writing key packet set to underlying file\n"); + lower_file->f_pos = 0; + oldfs = get_fs(); + set_fs(get_ds()); + ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->" + "write() w/ header page; lower_file->f_pos = " + "[0x%.16x]\n", lower_file->f_pos); + lower_file->f_op->write(lower_file, (char __user *)page_virt, + PAGE_CACHE_SIZE, &lower_file->f_pos); + header_pages = ((crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front) + / PAGE_CACHE_SIZE); + memset(page_virt, 0, PAGE_CACHE_SIZE); + current_header_page = 1; + while (current_header_page < header_pages) { + ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->" + "write() w/ zero'd page; lower_file->f_pos = " + "[0x%.16x]\n", lower_file->f_pos); + lower_file->f_op->write(lower_file, (char __user *)page_virt, + PAGE_CACHE_SIZE, &lower_file->f_pos); + current_header_page++; + } + set_fs(oldfs); + ecryptfs_printk(KERN_DEBUG, + "Done writing key packet set to underlying file.\n"); +out_free: + kmem_cache_free(ecryptfs_header_cache_0, page_virt); +out: + return rc; +} + +static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, + char *virt, int *bytes_read) +{ + int rc = 0; + u32 header_extent_size; + u16 num_header_extents_at_front; + + memcpy(&header_extent_size, virt, 4); + header_extent_size = be32_to_cpu(header_extent_size); + virt += 4; + memcpy(&num_header_extents_at_front, virt, 2); + num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); + crypt_stat->header_extent_size = (int)header_extent_size; + crypt_stat->num_header_extents_at_front = + (int)num_header_extents_at_front; + (*bytes_read) = 6; + if ((crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front) + < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, "Invalid header extent size: " + "[%d]\n", crypt_stat->header_extent_size); + } + return rc; +} + +/** + * set_default_header_data + * + * For version 0 file format; this function is only for backwards + * compatibility for files created with the prior versions of + * eCryptfs. + */ +static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) +{ + crypt_stat->header_extent_size = 4096; + crypt_stat->num_header_extents_at_front = 1; +} + +/** + * ecryptfs_read_headers_virt + * + * Read/parse the header data. The header format is detailed in the + * comment block for the ecryptfs_write_headers_virt() function. + * + * Returns zero on success + */ +static int ecryptfs_read_headers_virt(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry) +{ + int rc = 0; + int offset; + int bytes_read; + + ecryptfs_set_default_sizes(crypt_stat); + crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + offset = ECRYPTFS_FILE_SIZE_BYTES; + rc = contains_ecryptfs_marker(page_virt + offset); + if (rc == 0) { + rc = -EINVAL; + goto out; + } + offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; + rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset), + &bytes_read); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error processing flags\n"); + goto out; + } + if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) { + ecryptfs_printk(KERN_WARNING, "File version is [%d]; only " + "file version [%d] is supported by this " + "version of eCryptfs\n", + crypt_stat->file_version, + ECRYPTFS_SUPPORTED_FILE_VERSION); + rc = -EINVAL; + goto out; + } + offset += bytes_read; + if (crypt_stat->file_version >= 1) { + rc = parse_header_metadata(crypt_stat, (page_virt + offset), + &bytes_read); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error reading header " + "metadata; rc = [%d]\n", rc); + } + offset += bytes_read; + } else + set_default_header_data(crypt_stat); + rc = ecryptfs_parse_packet_set(crypt_stat, (page_virt + offset), + ecryptfs_dentry); +out: + return rc; +} + +/** + * ecryptfs_read_headers + * + * Returns zero if valid headers found and parsed; non-zero otherwise + */ +int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file) +{ + int rc = 0; + char *page_virt = NULL; + mm_segment_t oldfs; + ssize_t bytes_read; + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + + /* Read the first page from the underlying file */ + page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER); + if (!page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n"); + goto out; + } + lower_file->f_pos = 0; + oldfs = get_fs(); + set_fs(get_ds()); + bytes_read = lower_file->f_op->read(lower_file, + (char __user *)page_virt, + ECRYPTFS_DEFAULT_EXTENT_SIZE, + &lower_file->f_pos); + set_fs(oldfs); + if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) { + rc = -EINVAL; + goto out; + } + rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, + ecryptfs_dentry); + if (rc) { + ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not " + "found\n"); + rc = -EINVAL; + } +out: + if (page_virt) { + memset(page_virt, 0, PAGE_CACHE_SIZE); + kmem_cache_free(ecryptfs_header_cache_1, page_virt); + } + return rc; +} + +/** + * ecryptfs_encode_filename - converts a plaintext file name to cipher text + * @crypt_stat: The crypt_stat struct associated with the file anem to encode + * @name: The plaintext name + * @length: The length of the plaintext + * @encoded_name: The encypted name + * + * Encrypts and encodes a filename into something that constitutes a + * valid filename for a filesystem, with printable characters. + * + * We assume that we have a properly initialized crypto context, + * pointed to by crypt_stat->tfm. + * + * TODO: Implement filename decoding and decryption here, in place of + * memcpy. We are keeping the framework around for now to (1) + * facilitate testing of the components needed to implement filename + * encryption and (2) to provide a code base from which other + * developers in the community can easily implement this feature. + * + * Returns the length of encoded filename; negative if error + */ +int +ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, char **encoded_name) +{ + int error = 0; + + (*encoded_name) = kmalloc(length + 2, GFP_KERNEL); + if (!(*encoded_name)) { + error = -ENOMEM; + goto out; + } + /* TODO: Filename encryption is a scheduled feature for a + * future version of eCryptfs. This function is here only for + * the purpose of providing a framework for other developers + * to easily implement filename encryption. Hint: Replace this + * memcpy() with a call to encrypt and encode the + * filename, the set the length accordingly. */ + memcpy((void *)(*encoded_name), (void *)name, length); + (*encoded_name)[length] = '\0'; + error = length + 1; +out: + return error; +} + +/** + * ecryptfs_decode_filename - converts the cipher text name to plaintext + * @crypt_stat: The crypt_stat struct associated with the file + * @name: The filename in cipher text + * @length: The length of the cipher text name + * @decrypted_name: The plaintext name + * + * Decodes and decrypts the filename. + * + * We assume that we have a properly initialized crypto context, + * pointed to by crypt_stat->tfm. + * + * TODO: Implement filename decoding and decryption here, in place of + * memcpy. We are keeping the framework around for now to (1) + * facilitate testing of the components needed to implement filename + * encryption and (2) to provide a code base from which other + * developers in the community can easily implement this feature. + * + * Returns the length of decoded filename; negative if error + */ +int +ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, char **decrypted_name) +{ + int error = 0; + + (*decrypted_name) = kmalloc(length + 2, GFP_KERNEL); + if (!(*decrypted_name)) { + error = -ENOMEM; + goto out; + } + /* TODO: Filename encryption is a scheduled feature for a + * future version of eCryptfs. This function is here only for + * the purpose of providing a framework for other developers + * to easily implement filename encryption. Hint: Replace this + * memcpy() with a call to decode and decrypt the + * filename, the set the length accordingly. */ + memcpy((void *)(*decrypted_name), (void *)name, length); + (*decrypted_name)[length + 1] = '\0'; /* Only for convenience + * in printing out the + * string in debug + * messages */ + error = length; +out: + return error; +} + +/** + * ecryptfs_process_cipher - Perform cipher initialization. + * @tfm: Crypto context set by this function + * @key_tfm: Crypto context for key material, set by this function + * @cipher_name: Name of the cipher. + * @key_size: Size of the key in bytes. + * + * Returns zero on success. Any crypto_tfm structs allocated here + * should be released by other functions, such as on a superblock put + * event, regardless of whether this function succeeds for fails. + */ +int +ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, + char *cipher_name, size_t key_size) +{ + char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; + int rc; + + *tfm = *key_tfm = NULL; + if (key_size > ECRYPTFS_MAX_KEY_BYTES) { + rc = -EINVAL; + printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum " + "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES); + goto out; + } + *tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE + | CRYPTO_TFM_REQ_WEAK_KEY)); + if (!(*tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Unable to allocate crypto cipher with name " + "[%s]\n", cipher_name); + goto out; + } + *key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY); + if (!(*key_tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Unable to allocate crypto cipher with name " + "[%s]\n", cipher_name); + goto out; + } + if (key_size < crypto_tfm_alg_min_keysize(*tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; minimum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*tfm)); + goto out; + } + if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; minimum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); + goto out; + } + if (key_size > crypto_tfm_alg_max_keysize(*tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; maximum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*tfm)); + goto out; + } + if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) { + rc = -EINVAL; + printk(KERN_ERR "Request key size is [%Zd]; maximum key size " + "supported by cipher [%s] is [%d]\n", key_size, + cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); + goto out; + } + get_random_bytes(dummy_key, key_size); + rc = crypto_cipher_setkey(*tfm, dummy_key, key_size); + if (rc) { + printk(KERN_ERR "Error attempting to set key of size [%Zd] for " + "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); + rc = -EINVAL; + goto out; + } + rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size); + if (rc) { + printk(KERN_ERR "Error attempting to set key of size [%Zd] for " + "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); + rc = -EINVAL; + goto out; + } +out: + return rc; +} diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c new file mode 100644 index 000000000000..61f8e894284f --- /dev/null +++ b/fs/ecryptfs/debug.c @@ -0,0 +1,123 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * Functions only useful for debugging. + * + * Copyright (C) 2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include "ecryptfs_kernel.h" + +/** + * ecryptfs_dump_auth_tok - debug function to print auth toks + * + * This function will print the contents of an ecryptfs authentication + * token. + */ +void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok) +{ + char salt[ECRYPTFS_SALT_SIZE * 2 + 1]; + char sig[ECRYPTFS_SIG_SIZE_HEX + 1]; + + ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n", + auth_tok); + if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) { + ecryptfs_printk(KERN_DEBUG, " * private key type\n"); + ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT " + "IN ECRYPTFS VERSION 0.1)\n"); + } else { + ecryptfs_printk(KERN_DEBUG, " * passphrase type\n"); + ecryptfs_to_hex(salt, auth_tok->token.password.salt, + ECRYPTFS_SALT_SIZE); + salt[ECRYPTFS_SALT_SIZE * 2] = '\0'; + ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt); + if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, + ECRYPTFS_PERSISTENT_PASSWORD)) { + ecryptfs_printk(KERN_DEBUG, " * persistent\n"); + } + memcpy(sig, auth_tok->token.password.signature, + ECRYPTFS_SIG_SIZE_HEX); + sig[ECRYPTFS_SIG_SIZE_HEX] = '\0'; + ecryptfs_printk(KERN_DEBUG, " * signature = [%s]\n", sig); + } + ecryptfs_printk(KERN_DEBUG, " * session_key.flags = [0x%x]\n", + auth_tok->session_key.flags); + if (auth_tok->session_key.flags + & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT) + ecryptfs_printk(KERN_DEBUG, + " * Userspace decrypt request set\n"); + if (auth_tok->session_key.flags + & ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT) + ecryptfs_printk(KERN_DEBUG, + " * Userspace encrypt request set\n"); + if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_DECRYPTED_KEY) { + ecryptfs_printk(KERN_DEBUG, " * Contains decrypted key\n"); + ecryptfs_printk(KERN_DEBUG, + " * session_key.decrypted_key_size = [0x%x]\n", + auth_tok->session_key.decrypted_key_size); + ecryptfs_printk(KERN_DEBUG, " * Decrypted session key " + "dump:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(auth_tok->session_key.decrypted_key, + ECRYPTFS_DEFAULT_KEY_BYTES); + } + if (auth_tok->session_key.flags & ECRYPTFS_CONTAINS_ENCRYPTED_KEY) { + ecryptfs_printk(KERN_DEBUG, " * Contains encrypted key\n"); + ecryptfs_printk(KERN_DEBUG, + " * session_key.encrypted_key_size = [0x%x]\n", + auth_tok->session_key.encrypted_key_size); + ecryptfs_printk(KERN_DEBUG, " * Encrypted session key " + "dump:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(auth_tok->session_key.encrypted_key, + auth_tok->session_key. + encrypted_key_size); + } +} + +/** + * ecryptfs_dump_hex - debug hex printer + * @data: string of bytes to be printed + * @bytes: number of bytes to print + * + * Dump hexadecimal representation of char array + */ +void ecryptfs_dump_hex(char *data, int bytes) +{ + int i = 0; + int add_newline = 1; + + if (ecryptfs_verbosity < 1) + return; + if (bytes != 0) { + printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]); + i++; + } + while (i < bytes) { + printk("0x%.2x.", (unsigned char)data[i]); + i++; + if (i % 16 == 0) { + printk("\n"); + add_newline = 0; + } else + add_newline = 1; + } + if (add_newline) + printk("\n"); +} + diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c new file mode 100644 index 000000000000..f0d2a433242b --- /dev/null +++ b/fs/ecryptfs/dentry.c @@ -0,0 +1,87 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/dcache.h> +#include <linux/namei.h> +#include "ecryptfs_kernel.h" + +/** + * ecryptfs_d_revalidate - revalidate an ecryptfs dentry + * @dentry: The ecryptfs dentry + * @nd: The associated nameidata + * + * Called when the VFS needs to revalidate a dentry. This + * is called whenever a name lookup finds a dentry in the + * dcache. Most filesystems leave this as NULL, because all their + * dentries in the dcache are valid. + * + * Returns 1 if valid, 0 otherwise. + * + */ +static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct dentry *dentry_save; + struct vfsmount *vfsmount_save; + int rc = 1; + + if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) + goto out; + dentry_save = nd->dentry; + vfsmount_save = nd->mnt; + nd->dentry = lower_dentry; + nd->mnt = lower_mnt; + rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); + nd->dentry = dentry_save; + nd->mnt = vfsmount_save; +out: + return rc; +} + +struct kmem_cache *ecryptfs_dentry_info_cache; + +/** + * ecryptfs_d_release + * @dentry: The ecryptfs dentry + * + * Called when a dentry is really deallocated. + */ +static void ecryptfs_d_release(struct dentry *dentry) +{ + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (ecryptfs_dentry_to_private(dentry)) + kmem_cache_free(ecryptfs_dentry_info_cache, + ecryptfs_dentry_to_private(dentry)); + if (lower_dentry) + dput(lower_dentry); + return; +} + +struct dentry_operations ecryptfs_dops = { + .d_revalidate = ecryptfs_d_revalidate, + .d_release = ecryptfs_d_release, +}; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h new file mode 100644 index 000000000000..872c9958531a --- /dev/null +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -0,0 +1,482 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * Kernel declarations. + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef ECRYPTFS_KERNEL_H +#define ECRYPTFS_KERNEL_H + +#include <keys/user-type.h> +#include <linux/fs.h> +#include <linux/scatterlist.h> + +/* Version verification for shared data structures w/ userspace */ +#define ECRYPTFS_VERSION_MAJOR 0x00 +#define ECRYPTFS_VERSION_MINOR 0x04 +#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01 +/* These flags indicate which features are supported by the kernel + * module; userspace tools such as the mount helper read + * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine + * how to behave. */ +#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 +#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 +#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 +#define ECRYPTFS_VERSIONING_POLICY 0x00000008 +#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ + | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH) + +#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 +#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH +#define ECRYPTFS_SALT_SIZE 8 +#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2) +/* The original signature size is only for what is stored on disk; all + * in-memory representations are expanded hex, so it better adapted to + * be passed around or referenced on the command line */ +#define ECRYPTFS_SIG_SIZE 8 +#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2) +#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX +#define ECRYPTFS_MAX_KEY_BYTES 64 +#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 +#define ECRYPTFS_DEFAULT_IV_BYTES 16 +#define ECRYPTFS_FILE_VERSION 0x01 +#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192 +#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 +#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 + +#define RFC2440_CIPHER_DES3_EDE 0x02 +#define RFC2440_CIPHER_CAST_5 0x03 +#define RFC2440_CIPHER_BLOWFISH 0x04 +#define RFC2440_CIPHER_AES_128 0x07 +#define RFC2440_CIPHER_AES_192 0x08 +#define RFC2440_CIPHER_AES_256 0x09 +#define RFC2440_CIPHER_TWOFISH 0x0a +#define RFC2440_CIPHER_CAST_6 0x0b + +#define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag)) +#define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag)) +#define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag)) + +/** + * For convenience, we may need to pass around the encrypted session + * key between kernel and userspace because the authentication token + * may not be extractable. For example, the TPM may not release the + * private key, instead requiring the encrypted data and returning the + * decrypted data. + */ +struct ecryptfs_session_key { +#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001 +#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002 +#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004 +#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008 + u32 flags; + u32 encrypted_key_size; + u32 decrypted_key_size; + u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; + u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES]; +}; + +struct ecryptfs_password { + u32 password_bytes; + s32 hash_algo; + u32 hash_iterations; + u32 session_key_encryption_key_bytes; +#define ECRYPTFS_PERSISTENT_PASSWORD 0x01 +#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02 + u32 flags; + /* Iterated-hash concatenation of salt and passphrase */ + u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; + u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; + /* Always in expanded hex */ + u8 salt[ECRYPTFS_SALT_SIZE]; +}; + +enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY}; + +/* May be a password or a private key */ +struct ecryptfs_auth_tok { + u16 version; /* 8-bit major and 8-bit minor */ + u16 token_type; + u32 flags; + struct ecryptfs_session_key session_key; + u8 reserved[32]; + union { + struct ecryptfs_password password; + /* Private key is in future eCryptfs releases */ + } token; +} __attribute__ ((packed)); + +void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok); +extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size); +extern void ecryptfs_from_hex(char *dst, char *src, int dst_size); + +struct ecryptfs_key_record { + unsigned char type; + size_t enc_key_size; + unsigned char sig[ECRYPTFS_SIG_SIZE]; + unsigned char enc_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; +}; + +struct ecryptfs_auth_tok_list { + struct ecryptfs_auth_tok *auth_tok; + struct list_head list; +}; + +struct ecryptfs_crypt_stat; +struct ecryptfs_mount_crypt_stat; + +struct ecryptfs_page_crypt_context { + struct page *page; +#define ECRYPTFS_PREPARE_COMMIT_MODE 0 +#define ECRYPTFS_WRITEPAGE_MODE 1 + unsigned int mode; + union { + struct file *lower_file; + struct writeback_control *wbc; + } param; +}; + +static inline struct ecryptfs_auth_tok * +ecryptfs_get_key_payload_data(struct key *key) +{ + return (struct ecryptfs_auth_tok *) + (((struct user_key_payload*)key->payload.data)->data); +} + +#define ECRYPTFS_SUPER_MAGIC 0xf15f +#define ECRYPTFS_MAX_KEYSET_SIZE 1024 +#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 +#define ECRYPTFS_MAX_NUM_ENC_KEYS 64 +#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */ +#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ +#define ECRYPTFS_SALT_BYTES 2 +#define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 +#define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ +#define ECRYPTFS_FILE_SIZE_BYTES 8 +#define ECRYPTFS_DEFAULT_CIPHER "aes" +#define ECRYPTFS_DEFAULT_KEY_BYTES 16 +#define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC +#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C +#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED +#define MD5_DIGEST_SIZE 16 + +/** + * This is the primary struct associated with each encrypted file. + * + * TODO: cache align/pack? + */ +struct ecryptfs_crypt_stat { +#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 +#define ECRYPTFS_POLICY_APPLIED 0x00000002 +#define ECRYPTFS_NEW_FILE 0x00000004 +#define ECRYPTFS_ENCRYPTED 0x00000008 +#define ECRYPTFS_SECURITY_WARNING 0x00000010 +#define ECRYPTFS_ENABLE_HMAC 0x00000020 +#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 +#define ECRYPTFS_KEY_VALID 0x00000080 + u32 flags; + unsigned int file_version; + size_t iv_bytes; + size_t num_keysigs; + size_t header_extent_size; + size_t num_header_extents_at_front; + size_t extent_size; /* Data extent size; default is 4096 */ + size_t key_size; + size_t extent_shift; + unsigned int extent_mask; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + struct crypto_tfm *tfm; + struct crypto_tfm *md5_tfm; /* Crypto context for generating + * the initialization vectors */ + unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; + unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; + unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; + unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX]; + struct mutex cs_tfm_mutex; + struct mutex cs_md5_tfm_mutex; + struct mutex cs_mutex; +}; + +/* inode private data. */ +struct ecryptfs_inode_info { + struct inode vfs_inode; + struct inode *wii_inode; + struct ecryptfs_crypt_stat crypt_stat; +}; + +/* dentry private data. Each dentry must keep track of a lower + * vfsmount too. */ +struct ecryptfs_dentry_info { + struct dentry *wdi_dentry; + struct vfsmount *lower_mnt; + struct ecryptfs_crypt_stat *crypt_stat; +}; + +/** + * This struct is to enable a mount-wide passphrase/salt combo. This + * is more or less a stopgap to provide similar functionality to other + * crypto filesystems like EncFS or CFS until full policy support is + * implemented in eCryptfs. + */ +struct ecryptfs_mount_crypt_stat { + /* Pointers to memory we do not own, do not free these */ +#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001 + u32 flags; + struct ecryptfs_auth_tok *global_auth_tok; + struct key *global_auth_tok_key; + size_t global_default_cipher_key_size; + struct crypto_tfm *global_key_tfm; + struct mutex global_key_tfm_mutex; + unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + + 1]; + unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; +}; + +/* superblock private data. */ +struct ecryptfs_sb_info { + struct super_block *wsi_sb; + struct ecryptfs_mount_crypt_stat mount_crypt_stat; +}; + +/* file private data. */ +struct ecryptfs_file_info { + struct file *wfi_file; + struct ecryptfs_crypt_stat *crypt_stat; +}; + +/* auth_tok <=> encrypted_session_key mappings */ +struct ecryptfs_auth_tok_list_item { + unsigned char encrypted_session_key[ECRYPTFS_MAX_KEY_BYTES]; + struct list_head list; + struct ecryptfs_auth_tok auth_tok; +}; + +static inline struct ecryptfs_file_info * +ecryptfs_file_to_private(struct file *file) +{ + return (struct ecryptfs_file_info *)file->private_data; +} + +static inline void +ecryptfs_set_file_private(struct file *file, + struct ecryptfs_file_info *file_info) +{ + file->private_data = file_info; +} + +static inline struct file *ecryptfs_file_to_lower(struct file *file) +{ + return ((struct ecryptfs_file_info *)file->private_data)->wfi_file; +} + +static inline void +ecryptfs_set_file_lower(struct file *file, struct file *lower_file) +{ + ((struct ecryptfs_file_info *)file->private_data)->wfi_file = + lower_file; +} + +static inline struct ecryptfs_inode_info * +ecryptfs_inode_to_private(struct inode *inode) +{ + return container_of(inode, struct ecryptfs_inode_info, vfs_inode); +} + +static inline struct inode *ecryptfs_inode_to_lower(struct inode *inode) +{ + return ecryptfs_inode_to_private(inode)->wii_inode; +} + +static inline void +ecryptfs_set_inode_lower(struct inode *inode, struct inode *lower_inode) +{ + ecryptfs_inode_to_private(inode)->wii_inode = lower_inode; +} + +static inline struct ecryptfs_sb_info * +ecryptfs_superblock_to_private(struct super_block *sb) +{ + return (struct ecryptfs_sb_info *)sb->s_fs_info; +} + +static inline void +ecryptfs_set_superblock_private(struct super_block *sb, + struct ecryptfs_sb_info *sb_info) +{ + sb->s_fs_info = sb_info; +} + +static inline struct super_block * +ecryptfs_superblock_to_lower(struct super_block *sb) +{ + return ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb; +} + +static inline void +ecryptfs_set_superblock_lower(struct super_block *sb, + struct super_block *lower_sb) +{ + ((struct ecryptfs_sb_info *)sb->s_fs_info)->wsi_sb = lower_sb; +} + +static inline struct ecryptfs_dentry_info * +ecryptfs_dentry_to_private(struct dentry *dentry) +{ + return (struct ecryptfs_dentry_info *)dentry->d_fsdata; +} + +static inline void +ecryptfs_set_dentry_private(struct dentry *dentry, + struct ecryptfs_dentry_info *dentry_info) +{ + dentry->d_fsdata = dentry_info; +} + +static inline struct dentry * +ecryptfs_dentry_to_lower(struct dentry *dentry) +{ + return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry; +} + +static inline void +ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry) +{ + ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->wdi_dentry = + lower_dentry; +} + +static inline struct vfsmount * +ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) +{ + return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt; +} + +static inline void +ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) +{ + ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_mnt = + lower_mnt; +} + +#define ecryptfs_printk(type, fmt, arg...) \ + __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); +void __ecryptfs_printk(const char *fmt, ...); + +extern const struct file_operations ecryptfs_main_fops; +extern const struct file_operations ecryptfs_dir_fops; +extern struct inode_operations ecryptfs_main_iops; +extern struct inode_operations ecryptfs_dir_iops; +extern struct inode_operations ecryptfs_symlink_iops; +extern struct super_operations ecryptfs_sops; +extern struct dentry_operations ecryptfs_dops; +extern struct address_space_operations ecryptfs_aops; +extern int ecryptfs_verbosity; + +extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache; +extern struct kmem_cache *ecryptfs_file_info_cache; +extern struct kmem_cache *ecryptfs_dentry_info_cache; +extern struct kmem_cache *ecryptfs_inode_info_cache; +extern struct kmem_cache *ecryptfs_sb_info_cache; +extern struct kmem_cache *ecryptfs_header_cache_0; +extern struct kmem_cache *ecryptfs_header_cache_1; +extern struct kmem_cache *ecryptfs_header_cache_2; +extern struct kmem_cache *ecryptfs_lower_page_cache; + +int ecryptfs_interpose(struct dentry *hidden_dentry, + struct dentry *this_dentry, struct super_block *sb, + int flag); +int ecryptfs_fill_zeros(struct file *file, loff_t new_length); +int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, + char **decrypted_name); +int ecryptfs_encode_filename(struct ecryptfs_crypt_stat *crypt_stat, + const char *name, int length, + char **encoded_name); +struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); +void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src); +void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src); +void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src); +void ecryptfs_dump_hex(char *data, int bytes); +int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, + int sg_size); +int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); +void ecryptfs_rotate_iv(unsigned char *iv); +void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); +void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); +void ecryptfs_destruct_mount_crypt_stat( + struct ecryptfs_mount_crypt_stat *mount_crypt_stat); +int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_write_inode_size_to_header(struct file *lower_file, + struct inode *lower_inode, + struct inode *inode); +int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, + struct file *lower_file, + unsigned long lower_page_index, int byte_offset, + int region_bytes); +int +ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, + struct file *lower_file, int byte_offset, + int region_size); +int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, + struct file *lower_file); +int ecryptfs_do_readpage(struct file *file, struct page *page, + pgoff_t lower_page_index); +int ecryptfs_grab_and_map_lower_page(struct page **lower_page, + char **lower_virt, + struct inode *lower_inode, + unsigned long lower_page_index); +int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, + struct inode *lower_inode, + struct writeback_control *wbc); +int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx); +int ecryptfs_decrypt_page(struct file *file, struct page *page); +int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file); +int ecryptfs_write_headers_virt(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry); +int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, + struct file *lower_file); +int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); +int contains_ecryptfs_marker(char *data); +int ecryptfs_read_header_region(char *data, struct dentry *dentry, + struct vfsmount *mnt); +u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code); +void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_generate_key_packet_set(char *dest_base, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry, + size_t *len, size_t max); +int process_request_key_err(long err_code); +int +ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, + unsigned char *src, struct dentry *ecryptfs_dentry); +int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); +int +ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, + char *cipher_name, size_t key_size); +int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); +int ecryptfs_inode_set(struct inode *inode, void *lower_inode); +void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); + +#endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c new file mode 100644 index 000000000000..c8550c9f9cd2 --- /dev/null +++ b/fs/ecryptfs/file.c @@ -0,0 +1,440 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2004 Erez Zadok + * Copyright (C) 2001-2004 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> + * Michael C. Thompson <mcthomps@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/mount.h> +#include <linux/pagemap.h> +#include <linux/security.h> +#include <linux/smp_lock.h> +#include <linux/compat.h> +#include "ecryptfs_kernel.h" + +/** + * ecryptfs_llseek + * @file: File we are seeking in + * @offset: The offset to seek to + * @origin: 2 - offset from i_size; 1 - offset from f_pos + * + * Returns the position we have seeked to, or negative on error + */ +static loff_t ecryptfs_llseek(struct file *file, loff_t offset, int origin) +{ + loff_t rv; + loff_t new_end_pos; + int rc; + int expanding_file = 0; + struct inode *inode = file->f_mapping->host; + + /* If our offset is past the end of our file, we're going to + * need to grow it so we have a valid length of 0's */ + new_end_pos = offset; + switch (origin) { + case 2: + new_end_pos += i_size_read(inode); + expanding_file = 1; + break; + case 1: + new_end_pos += file->f_pos; + if (new_end_pos > i_size_read(inode)) { + ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " + "> i_size_read(inode)(=[0x%.16x])\n", + new_end_pos, i_size_read(inode)); + expanding_file = 1; + } + break; + default: + if (new_end_pos > i_size_read(inode)) { + ecryptfs_printk(KERN_DEBUG, "new_end_pos(=[0x%.16x]) " + "> i_size_read(inode)(=[0x%.16x])\n", + new_end_pos, i_size_read(inode)); + expanding_file = 1; + } + } + ecryptfs_printk(KERN_DEBUG, "new_end_pos = [0x%.16x]\n", new_end_pos); + if (expanding_file) { + rc = ecryptfs_truncate(file->f_dentry, new_end_pos); + if (rc) { + rv = rc; + ecryptfs_printk(KERN_ERR, "Error on attempt to " + "truncate to (higher) offset [0x%.16x];" + " rc = [%d]\n", new_end_pos, rc); + goto out; + } + } + rv = generic_file_llseek(file, offset, origin); +out: + return rv; +} + +/** + * ecryptfs_read_update_atime + * + * generic_file_read updates the atime of upper layer inode. But, it + * doesn't give us a chance to update the atime of the lower layer + * inode. This function is a wrapper to generic_file_read. It + * updates the atime of the lower level inode if generic_file_read + * returns without any errors. This is to be used only for file reads. + * The function to be used for directory reads is ecryptfs_read. + */ +static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + int rc; + struct dentry *lower_dentry; + struct vfsmount *lower_vfsmount; + struct file *file = iocb->ki_filp; + + rc = generic_file_aio_read(iocb, iov, nr_segs, pos); + /* + * Even though this is a async interface, we need to wait + * for IO to finish to update atime + */ + if (-EIOCBQUEUED == rc) + rc = wait_on_sync_kiocb(iocb); + if (rc >= 0) { + lower_dentry = ecryptfs_dentry_to_lower(file->f_dentry); + lower_vfsmount = ecryptfs_dentry_to_lower_mnt(file->f_dentry); + touch_atime(lower_vfsmount, lower_dentry); + } + return rc; +} + +struct ecryptfs_getdents_callback { + void *dirent; + struct dentry *dentry; + filldir_t filldir; + int err; + int filldir_called; + int entries_written; +}; + +/* Inspired by generic filldir in fs/readir.c */ +static int +ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, + u64 ino, unsigned int d_type) +{ + struct ecryptfs_crypt_stat *crypt_stat; + struct ecryptfs_getdents_callback *buf = + (struct ecryptfs_getdents_callback *)dirent; + int rc; + int decoded_length; + char *decoded_name; + + crypt_stat = ecryptfs_dentry_to_private(buf->dentry)->crypt_stat; + buf->filldir_called++; + decoded_length = ecryptfs_decode_filename(crypt_stat, name, namelen, + &decoded_name); + if (decoded_length < 0) { + rc = decoded_length; + goto out; + } + rc = buf->filldir(buf->dirent, decoded_name, decoded_length, offset, + ino, d_type); + kfree(decoded_name); + if (rc >= 0) + buf->entries_written++; +out: + return rc; +} + +/** + * ecryptfs_readdir + * @file: The ecryptfs file struct + * @dirent: Directory entry + * @filldir: The filldir callback function + */ +static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int rc; + struct file *lower_file; + struct inode *inode; + struct ecryptfs_getdents_callback buf; + + lower_file = ecryptfs_file_to_lower(file); + lower_file->f_pos = file->f_pos; + inode = file->f_dentry->d_inode; + memset(&buf, 0, sizeof(buf)); + buf.dirent = dirent; + buf.dentry = file->f_dentry; + buf.filldir = filldir; +retry: + buf.filldir_called = 0; + buf.entries_written = 0; + buf.err = 0; + rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); + if (buf.err) + rc = buf.err; + if (buf.filldir_called && !buf.entries_written) + goto retry; + file->f_pos = lower_file->f_pos; + if (rc >= 0) + ecryptfs_copy_attr_atime(inode, lower_file->f_dentry->d_inode); + return rc; +} + +struct kmem_cache *ecryptfs_file_info_cache; + +/** + * ecryptfs_open + * @inode: inode speciying file to open + * @file: Structure to return filled in + * + * Opens the file specified by inode. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_open(struct inode *inode, struct file *file) +{ + int rc = 0; + struct ecryptfs_crypt_stat *crypt_stat = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + struct dentry *ecryptfs_dentry = file->f_dentry; + /* Private value of ecryptfs_dentry allocated in + * ecryptfs_lookup() */ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + struct inode *lower_inode = NULL; + struct file *lower_file = NULL; + struct vfsmount *lower_mnt; + struct ecryptfs_file_info *file_info; + int lower_flags; + + /* Released in ecryptfs_release or end of function if failure */ + file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL); + ecryptfs_set_file_private(file, file_info); + if (!file_info) { + ecryptfs_printk(KERN_ERR, + "Error attempting to allocate memory\n"); + rc = -ENOMEM; + goto out; + } + memset(file_info, 0, sizeof(*file_info)); + lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + mount_crypt_stat = &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + mutex_lock(&crypt_stat->cs_mutex); + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) { + ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n"); + /* Policy code enabled in future release */ + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + } + mutex_unlock(&crypt_stat->cs_mutex); + /* This mntget & dget is undone via fput when the file is released */ + dget(lower_dentry); + lower_flags = file->f_flags; + if ((lower_flags & O_ACCMODE) == O_WRONLY) + lower_flags = (lower_flags & O_ACCMODE) | O_RDWR; + if (file->f_flags & O_APPEND) + lower_flags &= ~O_APPEND; + lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); + mntget(lower_mnt); + /* Corresponding fput() in ecryptfs_release() */ + lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags); + if (IS_ERR(lower_file)) { + rc = PTR_ERR(lower_file); + ecryptfs_printk(KERN_ERR, "Error opening lower file\n"); + goto out_puts; + } + ecryptfs_set_file_lower(file, lower_file); + /* Isn't this check the same as the one in lookup? */ + lower_inode = lower_dentry->d_inode; + if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + rc = 0; + goto out; + } + mutex_lock(&crypt_stat->cs_mutex); + if (i_size_read(lower_inode) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { + if (!(mount_crypt_stat->flags + & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { + rc = -EIO; + printk(KERN_WARNING "Attempt to read file that is " + "not in a valid eCryptfs format, and plaintext " + "passthrough mode is not enabled; returning " + "-EIO\n"); + mutex_unlock(&crypt_stat->cs_mutex); + goto out_puts; + } + crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + rc = 0; + mutex_unlock(&crypt_stat->cs_mutex); + goto out; + } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_POLICY_APPLIED) + || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, + ECRYPTFS_KEY_VALID)) { + rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file); + if (rc) { + ecryptfs_printk(KERN_DEBUG, + "Valid headers not found\n"); + if (!(mount_crypt_stat->flags + & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { + rc = -EIO; + printk(KERN_WARNING "Attempt to read file that " + "is not in a valid eCryptfs format, " + "and plaintext passthrough mode is not " + "enabled; returning -EIO\n"); + mutex_unlock(&crypt_stat->cs_mutex); + goto out_puts; + } + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, + ECRYPTFS_ENCRYPTED); + rc = 0; + mutex_unlock(&crypt_stat->cs_mutex); + goto out; + } + } + mutex_unlock(&crypt_stat->cs_mutex); + ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " + "size: [0x%.16x]\n", inode, inode->i_ino, + i_size_read(inode)); + ecryptfs_set_file_lower(file, lower_file); + goto out; +out_puts: + mntput(lower_mnt); + dput(lower_dentry); + kmem_cache_free(ecryptfs_file_info_cache, + ecryptfs_file_to_private(file)); +out: + return rc; +} + +static int ecryptfs_flush(struct file *file, fl_owner_t td) +{ + int rc = 0; + struct file *lower_file = NULL; + + lower_file = ecryptfs_file_to_lower(file); + if (lower_file->f_op && lower_file->f_op->flush) + rc = lower_file->f_op->flush(lower_file, td); + return rc; +} + +static int ecryptfs_release(struct inode *inode, struct file *file) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file); + struct inode *lower_inode = ecryptfs_inode_to_lower(inode); + + fput(lower_file); + inode->i_blocks = lower_inode->i_blocks; + kmem_cache_free(ecryptfs_file_info_cache, file_info); + return 0; +} + +static int +ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_inode = lower_dentry->d_inode; + int rc = -EINVAL; + + if (lower_inode->i_fop->fsync) { + mutex_lock(&lower_inode->i_mutex); + rc = lower_inode->i_fop->fsync(lower_file, lower_dentry, + datasync); + mutex_unlock(&lower_inode->i_mutex); + } + return rc; +} + +static int ecryptfs_fasync(int fd, struct file *file, int flag) +{ + int rc = 0; + struct file *lower_file = NULL; + + lower_file = ecryptfs_file_to_lower(file); + if (lower_file->f_op && lower_file->f_op->fasync) + rc = lower_file->f_op->fasync(fd, lower_file, flag); + return rc; +} + +static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos, + size_t count, read_actor_t actor, void *target) +{ + struct file *lower_file = NULL; + int rc = -EINVAL; + + lower_file = ecryptfs_file_to_lower(file); + if (lower_file->f_op && lower_file->f_op->sendfile) + rc = lower_file->f_op->sendfile(lower_file, ppos, count, + actor, target); + + return rc; +} + +static int ecryptfs_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + +const struct file_operations ecryptfs_dir_fops = { + .readdir = ecryptfs_readdir, + .ioctl = ecryptfs_ioctl, + .mmap = generic_file_mmap, + .open = ecryptfs_open, + .flush = ecryptfs_flush, + .release = ecryptfs_release, + .fsync = ecryptfs_fsync, + .fasync = ecryptfs_fasync, + .sendfile = ecryptfs_sendfile, +}; + +const struct file_operations ecryptfs_main_fops = { + .llseek = ecryptfs_llseek, + .read = do_sync_read, + .aio_read = ecryptfs_read_update_atime, + .write = do_sync_write, + .aio_write = generic_file_aio_write, + .readdir = ecryptfs_readdir, + .ioctl = ecryptfs_ioctl, + .mmap = generic_file_mmap, + .open = ecryptfs_open, + .flush = ecryptfs_flush, + .release = ecryptfs_release, + .fsync = ecryptfs_fsync, + .fasync = ecryptfs_fasync, + .sendfile = ecryptfs_sendfile, +}; + +static int +ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + int rc = 0; + struct file *lower_file = NULL; + + if (ecryptfs_file_to_private(file)) + lower_file = ecryptfs_file_to_lower(file); + if (lower_file && lower_file->f_op && lower_file->f_op->ioctl) + rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode), + lower_file, cmd, arg); + else + rc = -ENOTTY; + return rc; +} diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c new file mode 100644 index 000000000000..efdd2b7b62d7 --- /dev/null +++ b/fs/ecryptfs/inode.c @@ -0,0 +1,1079 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2004 Erez Zadok + * Copyright (C) 2001-2004 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * Michael C. Thompsion <mcthomps@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/file.h> +#include <linux/vmalloc.h> +#include <linux/pagemap.h> +#include <linux/dcache.h> +#include <linux/namei.h> +#include <linux/mount.h> +#include <linux/crypto.h> +#include "ecryptfs_kernel.h" + +static struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir; + + dir = dget(dentry->d_parent); + mutex_lock(&(dir->d_inode->i_mutex)); + return dir; +} + +static void unlock_parent(struct dentry *dentry) +{ + mutex_unlock(&(dentry->d_parent->d_inode->i_mutex)); + dput(dentry->d_parent); +} + +static void unlock_dir(struct dentry *dir) +{ + mutex_unlock(&dir->d_inode->i_mutex); + dput(dir); +} + +void ecryptfs_copy_inode_size(struct inode *dst, const struct inode *src) +{ + i_size_write(dst, i_size_read((struct inode *)src)); + dst->i_blocks = src->i_blocks; +} + +void ecryptfs_copy_attr_atime(struct inode *dest, const struct inode *src) +{ + dest->i_atime = src->i_atime; +} + +static void ecryptfs_copy_attr_times(struct inode *dest, + const struct inode *src) +{ + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; +} + +static void ecryptfs_copy_attr_timesizes(struct inode *dest, + const struct inode *src) +{ + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + ecryptfs_copy_inode_size(dest, src); +} + +void ecryptfs_copy_attr_all(struct inode *dest, const struct inode *src) +{ + dest->i_mode = src->i_mode; + dest->i_nlink = src->i_nlink; + dest->i_uid = src->i_uid; + dest->i_gid = src->i_gid; + dest->i_rdev = src->i_rdev; + dest->i_atime = src->i_atime; + dest->i_mtime = src->i_mtime; + dest->i_ctime = src->i_ctime; + dest->i_blkbits = src->i_blkbits; + dest->i_flags = src->i_flags; +} + +/** + * ecryptfs_create_underlying_file + * @lower_dir_inode: inode of the parent in the lower fs of the new file + * @lower_dentry: New file's dentry in the lower fs + * @ecryptfs_dentry: New file's dentry in ecryptfs + * @mode: The mode of the new file + * @nd: nameidata of ecryptfs' parent's dentry & vfsmount + * + * Creates the file in the lower file system. + * + * Returns zero on success; non-zero on error condition + */ +static int +ecryptfs_create_underlying_file(struct inode *lower_dir_inode, + struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct dentry *dentry_save; + struct vfsmount *vfsmount_save; + int rc; + + dentry_save = nd->dentry; + vfsmount_save = nd->mnt; + nd->dentry = lower_dentry; + nd->mnt = lower_mnt; + rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); + nd->dentry = dentry_save; + nd->mnt = vfsmount_save; + return rc; +} + +/** + * ecryptfs_do_create + * @directory_inode: inode of the new file's dentry's parent in ecryptfs + * @ecryptfs_dentry: New file's dentry in ecryptfs + * @mode: The mode of the new file + * @nd: nameidata of ecryptfs' parent's dentry & vfsmount + * + * Creates the underlying file and the eCryptfs inode which will link to + * it. It will also update the eCryptfs directory inode to mimic the + * stat of the lower directory inode. + * + * Returns zero on success; non-zero on error condition + */ +static int +ecryptfs_do_create(struct inode *directory_inode, + struct dentry *ecryptfs_dentry, int mode, + struct nameidata *nd) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + if (unlikely(IS_ERR(lower_dir_dentry))) { + ecryptfs_printk(KERN_ERR, "Error locking directory of " + "dentry\n"); + rc = PTR_ERR(lower_dir_dentry); + goto out; + } + rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, + ecryptfs_dentry, mode, nd); + if (unlikely(rc)) { + ecryptfs_printk(KERN_ERR, + "Failure to create underlying file\n"); + goto out_lock; + } + rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, + directory_inode->i_sb, 0); + if (rc) { + ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); + goto out_lock; + } + ecryptfs_copy_attr_timesizes(directory_inode, + lower_dir_dentry->d_inode); +out_lock: + unlock_dir(lower_dir_dentry); +out: + return rc; +} + +/** + * grow_file + * @ecryptfs_dentry: the ecryptfs dentry + * @lower_file: The lower file + * @inode: The ecryptfs inode + * @lower_inode: The lower inode + * + * This is the code which will grow the file to its correct size. + */ +static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file, + struct inode *inode, struct inode *lower_inode) +{ + int rc = 0; + struct file fake_file; + struct ecryptfs_file_info tmp_file_info; + + memset(&fake_file, 0, sizeof(fake_file)); + fake_file.f_dentry = ecryptfs_dentry; + memset(&tmp_file_info, 0, sizeof(tmp_file_info)); + ecryptfs_set_file_private(&fake_file, &tmp_file_info); + ecryptfs_set_file_lower(&fake_file, lower_file); + rc = ecryptfs_fill_zeros(&fake_file, 1); + if (rc) { + ECRYPTFS_SET_FLAG( + ecryptfs_inode_to_private(inode)->crypt_stat.flags, + ECRYPTFS_SECURITY_WARNING); + ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros " + "in file; rc = [%d]\n", rc); + goto out; + } + i_size_write(inode, 0); + ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); + ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags, + ECRYPTFS_NEW_FILE); +out: + return rc; +} + +/** + * ecryptfs_initialize_file + * + * Cause the file to be changed from a basic empty file to an ecryptfs + * file with a header and first data page. + * + * Returns zero on success + */ +static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) +{ + int rc = 0; + int lower_flags; + struct ecryptfs_crypt_stat *crypt_stat; + struct dentry *lower_dentry; + struct dentry *tlower_dentry = NULL; + struct file *lower_file; + struct inode *inode, *lower_inode; + struct vfsmount *lower_mnt; + + lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n", + lower_dentry->d_name.name); + inode = ecryptfs_dentry->d_inode; + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + tlower_dentry = dget(lower_dentry); + if (!tlower_dentry) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n"); + goto out; + } + lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR; +#if BITS_PER_LONG != 32 + lower_flags |= O_LARGEFILE; +#endif + lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); + mntget(lower_mnt); + /* Corresponding fput() at end of this function */ + lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags); + if (IS_ERR(lower_file)) { + rc = PTR_ERR(lower_file); + ecryptfs_printk(KERN_ERR, + "Error opening dentry; rc = [%i]\n", rc); + goto out; + } + /* fput(lower_file) should handle the puts if we do this */ + lower_file->f_dentry = tlower_dentry; + lower_file->f_vfsmnt = lower_mnt; + lower_inode = tlower_dentry->d_inode; + if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); + goto out_fput; + } + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE); + ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); + rc = ecryptfs_new_file_context(ecryptfs_dentry); + if (rc) { + ecryptfs_printk(KERN_DEBUG, "Error creating new file " + "context\n"); + goto out_fput; + } + rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file); + if (rc) { + ecryptfs_printk(KERN_DEBUG, "Error writing headers\n"); + goto out_fput; + } + rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode); +out_fput: + fput(lower_file); +out: + return rc; +} + +/** + * ecryptfs_create + * @dir: The inode of the directory in which to create the file. + * @dentry: The eCryptfs dentry + * @mode: The mode of the new file. + * @nd: nameidata + * + * Creates a new file. + * + * Returns zero on success; non-zero on error condition + */ +static int +ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, + int mode, struct nameidata *nd) +{ + int rc; + + rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); + if (unlikely(rc)) { + ecryptfs_printk(KERN_WARNING, "Failed to create file in" + "lower filesystem\n"); + goto out; + } + /* At this point, a file exists on "disk"; we need to make sure + * that this on disk file is prepared to be an ecryptfs file */ + rc = ecryptfs_initialize_file(ecryptfs_dentry); +out: + return rc; +} + +/** + * ecryptfs_lookup + * @dir: inode + * @dentry: The dentry + * @nd: nameidata, may be NULL + * + * Find a file on disk. If the file does not exist, then we'll add it to the + * dentry cache and continue on to read it from the disk. + */ +static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + int rc = 0; + struct dentry *lower_dir_dentry; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct dentry *tlower_dentry = NULL; + char *encoded_name; + unsigned int encoded_namelen; + struct ecryptfs_crypt_stat *crypt_stat = NULL; + char *page_virt = NULL; + struct inode *lower_inode; + u64 file_size; + + lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); + dentry->d_op = &ecryptfs_dops; + if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, ".")) + || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, ".."))) + goto out_drop; + encoded_namelen = ecryptfs_encode_filename(crypt_stat, + dentry->d_name.name, + dentry->d_name.len, + &encoded_name); + if (encoded_namelen < 0) { + rc = encoded_namelen; + goto out_drop; + } + ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen " + "= [%d]\n", encoded_name, encoded_namelen); + lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry, + encoded_namelen - 1); + kfree(encoded_name); + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); + if (IS_ERR(lower_dentry)) { + ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n"); + rc = PTR_ERR(lower_dentry); + goto out_drop; + } + ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->" + "d_name.name = [%s]\n", lower_dentry, + lower_dentry->d_name.name); + lower_inode = lower_dentry->d_inode; + ecryptfs_copy_attr_atime(dir, lower_dir_dentry->d_inode); + BUG_ON(!atomic_read(&lower_dentry->d_count)); + ecryptfs_set_dentry_private(dentry, + kmem_cache_alloc(ecryptfs_dentry_info_cache, + SLAB_KERNEL)); + if (!ecryptfs_dentry_to_private(dentry)) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting " + "to allocate ecryptfs_dentry_info struct\n"); + goto out_dput; + } + ecryptfs_set_dentry_lower(dentry, lower_dentry); + ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); + if (!lower_dentry->d_inode) { + /* We want to add because we couldn't find in lower */ + d_add(dentry, NULL); + goto out; + } + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error interposing\n"); + goto out_dput; + } + if (S_ISDIR(lower_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); + goto out; + } + if (S_ISLNK(lower_inode->i_mode)) { + ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n"); + goto out; + } + if (!nd) { + ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave" + "as we *think* we are about to unlink\n"); + goto out; + } + tlower_dentry = dget(lower_dentry); + if (!tlower_dentry || IS_ERR(tlower_dentry)) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n"); + goto out_dput; + } + /* Released in this function */ + page_virt = + (char *)kmem_cache_alloc(ecryptfs_header_cache_2, + SLAB_USER); + if (!page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, + "Cannot ecryptfs_kmalloc a page\n"); + goto out_dput; + } + memset(page_virt, 0, PAGE_CACHE_SIZE); + rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt); + crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; + if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) + ecryptfs_set_default_sizes(crypt_stat); + if (rc) { + rc = 0; + ecryptfs_printk(KERN_WARNING, "Error reading header region;" + " assuming unencrypted\n"); + } else { + if (!contains_ecryptfs_marker(page_virt + + ECRYPTFS_FILE_SIZE_BYTES)) { + kmem_cache_free(ecryptfs_header_cache_2, page_virt); + goto out; + } + memcpy(&file_size, page_virt, sizeof(file_size)); + file_size = be64_to_cpu(file_size); + i_size_write(dentry->d_inode, (loff_t)file_size); + } + kmem_cache_free(ecryptfs_header_cache_2, page_virt); + goto out; + +out_dput: + dput(lower_dentry); + if (tlower_dentry) + dput(tlower_dentry); +out_drop: + d_drop(dentry); +out: + return ERR_PTR(rc); +} + +static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + struct dentry *lower_old_dentry; + struct dentry *lower_new_dentry; + struct dentry *lower_dir_dentry; + u64 file_size_save; + int rc; + + file_size_save = i_size_read(old_dentry->d_inode); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_dir_dentry = lock_parent(lower_new_dentry); + rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, + lower_new_dentry); + if (rc || !lower_new_dentry->d_inode) + goto out_lock; + rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); + if (rc) + goto out_lock; + ecryptfs_copy_attr_timesizes(dir, lower_new_dentry->d_inode); + old_dentry->d_inode->i_nlink = + ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; + i_size_write(new_dentry->d_inode, file_size_save); +out_lock: + unlock_dir(lower_dir_dentry); + dput(lower_new_dentry); + dput(lower_old_dentry); + if (!new_dentry->d_inode) + d_drop(new_dentry); + return rc; +} + +static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int rc = 0; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + + lock_parent(lower_dentry); + rc = vfs_unlink(lower_dir_inode, lower_dentry); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n"); + goto out_unlock; + } + ecryptfs_copy_attr_times(dir, lower_dir_inode); + dentry->d_inode->i_nlink = + ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; + dentry->d_inode->i_ctime = dir->i_ctime; +out_unlock: + unlock_parent(lower_dentry); + return rc; +} + +static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + umode_t mode; + char *encoded_symname; + unsigned int encoded_symlen; + struct ecryptfs_crypt_stat *crypt_stat = NULL; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + mode = S_IALLUGO; + encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, + strlen(symname), + &encoded_symname); + if (encoded_symlen < 0) { + rc = encoded_symlen; + goto out_lock; + } + rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, + encoded_symname, mode); + kfree(encoded_symname); + if (rc || !lower_dentry->d_inode) + goto out_lock; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + if (rc) + goto out_lock; + ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); +out_lock: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + if (!dentry->d_inode) + d_drop(dentry); + return rc; +} + +static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + if (rc) + goto out; + ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); + dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; +out: + unlock_dir(lower_dir_dentry); + if (!dentry->d_inode) + d_drop(dentry); + return rc; +} + +static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + int rc = 0; + struct dentry *tdentry = NULL; + struct dentry *lower_dentry; + struct dentry *tlower_dentry = NULL; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!(tdentry = dget(dentry))) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n", + dentry); + goto out; + } + lower_dir_dentry = lock_parent(lower_dentry); + if (!(tlower_dentry = dget(lower_dentry))) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry " + "[%p]\n", lower_dentry); + goto out; + } + rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); + if (!rc) { + d_delete(tlower_dentry); + tlower_dentry = NULL; + } + ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode); + dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; + unlock_dir(lower_dir_dentry); + if (!rc) + d_drop(dentry); +out: + if (tdentry) + dput(tdentry); + if (tlower_dentry) + dput(tlower_dentry); + return rc; +} + +static int +ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + int rc; + struct dentry *lower_dentry; + struct dentry *lower_dir_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + lower_dir_dentry = lock_parent(lower_dentry); + rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); + if (rc || !lower_dentry->d_inode) + goto out; + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + if (rc) + goto out; + ecryptfs_copy_attr_timesizes(dir, lower_dir_dentry->d_inode); +out: + unlock_dir(lower_dir_dentry); + if (!dentry->d_inode) + d_drop(dentry); + return rc; +} + +static int +ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int rc; + struct dentry *lower_old_dentry; + struct dentry *lower_new_dentry; + struct dentry *lower_old_dir_dentry; + struct dentry *lower_new_dir_dentry; + + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); + lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); + dget(lower_old_dentry); + dget(lower_new_dentry); + lower_old_dir_dentry = dget_parent(lower_old_dentry); + lower_new_dir_dentry = dget_parent(lower_new_dentry); + lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, + lower_new_dir_dentry->d_inode, lower_new_dentry); + if (rc) + goto out_lock; + ecryptfs_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); + if (new_dir != old_dir) + ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); +out_lock: + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dput(lower_new_dentry); + dput(lower_old_dentry); + return rc; +} + +static int +ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz) +{ + int rc; + struct dentry *lower_dentry; + char *decoded_name; + char *lower_buf; + mm_segment_t old_fs; + struct ecryptfs_crypt_stat *crypt_stat; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op || + !lower_dentry->d_inode->i_op->readlink) { + rc = -EINVAL; + goto out; + } + /* Released in this function */ + lower_buf = kmalloc(bufsiz, GFP_KERNEL); + if (lower_buf == NULL) { + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out; + } + old_fs = get_fs(); + set_fs(get_ds()); + ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " + "lower_dentry->d_name.name = [%s]\n", + lower_dentry->d_name.name); + rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, + (char __user *)lower_buf, + bufsiz); + set_fs(old_fs); + if (rc >= 0) { + crypt_stat = NULL; + rc = ecryptfs_decode_filename(crypt_stat, lower_buf, rc, + &decoded_name); + if (rc == -ENOMEM) + goto out_free_lower_buf; + if (rc > 0) { + ecryptfs_printk(KERN_DEBUG, "Copying [%d] bytes " + "to userspace: [%*s]\n", rc, + decoded_name); + if (copy_to_user(buf, decoded_name, rc)) + rc = -EFAULT; + } + kfree(decoded_name); + ecryptfs_copy_attr_atime(dentry->d_inode, + lower_dentry->d_inode); + } +out_free_lower_buf: + kfree(lower_buf); +out: + return rc; +} + +static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *buf; + int len = PAGE_SIZE, rc; + mm_segment_t old_fs; + + /* Released in ecryptfs_put_link(); only release here on error */ + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + goto out; + } + old_fs = get_fs(); + set_fs(get_ds()); + ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " + "dentry->d_name.name = [%s]\n", dentry->d_name.name); + rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); + buf[rc] = '\0'; + set_fs(old_fs); + if (rc < 0) + goto out_free; + rc = 0; + nd_set_link(nd, buf); + goto out; +out_free: + kfree(buf); +out: + return ERR_PTR(rc); +} + +static void +ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) +{ + /* Free the char* */ + kfree(nd_get_link(nd)); +} + +/** + * upper_size_to_lower_size + * @crypt_stat: Crypt_stat associated with file + * @upper_size: Size of the upper file + * + * Calculate the requried size of the lower file based on the + * specified size of the upper file. This calculation is based on the + * number of headers in the underlying file and the extent size. + * + * Returns Calculated size of the lower file. + */ +static loff_t +upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, + loff_t upper_size) +{ + loff_t lower_size; + + lower_size = ( crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front ); + if (upper_size != 0) { + loff_t num_extents; + + num_extents = upper_size >> crypt_stat->extent_shift; + if (upper_size & ~crypt_stat->extent_mask) + num_extents++; + lower_size += (num_extents * crypt_stat->extent_size); + } + return lower_size; +} + +/** + * ecryptfs_truncate + * @dentry: The ecryptfs layer dentry + * @new_length: The length to expand the file to + * + * Function to handle truncations modifying the size of the file. Note + * that the file sizes are interpolated. When expanding, we are simply + * writing strings of 0's out. When truncating, we need to modify the + * underlying file size according to the page index interpolations. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) +{ + int rc = 0; + struct inode *inode = dentry->d_inode; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct file fake_ecryptfs_file, *lower_file = NULL; + struct ecryptfs_crypt_stat *crypt_stat; + loff_t i_size = i_size_read(inode); + loff_t lower_size_before_truncate; + loff_t lower_size_after_truncate; + + if (unlikely((new_length == i_size))) + goto out; + crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; + /* Set up a fake ecryptfs file, this is used to interface with + * the file in the underlying filesystem so that the + * truncation has an effect there as well. */ + memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file)); + fake_ecryptfs_file.f_dentry = dentry; + /* Released at out_free: label */ + ecryptfs_set_file_private(&fake_ecryptfs_file, + kmem_cache_alloc(ecryptfs_file_info_cache, + SLAB_KERNEL)); + if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) { + rc = -ENOMEM; + goto out; + } + lower_dentry = ecryptfs_dentry_to_lower(dentry); + /* This dget & mntget is released through fput at out_fput: */ + dget(lower_dentry); + lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + mntget(lower_mnt); + lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR); + if (unlikely(IS_ERR(lower_file))) { + rc = PTR_ERR(lower_file); + goto out_free; + } + ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file); + /* Switch on growing or shrinking file */ + if (new_length > i_size) { + rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length); + if (rc) { + ecryptfs_printk(KERN_ERR, + "Problem with fill_zeros\n"); + goto out_fput; + } + i_size_write(inode, new_length); + rc = ecryptfs_write_inode_size_to_header(lower_file, + lower_dentry->d_inode, + inode); + if (rc) { + ecryptfs_printk(KERN_ERR, + "Problem with ecryptfs_write" + "_inode_size\n"); + goto out_fput; + } + } else { /* new_length < i_size_read(inode) */ + vmtruncate(inode, new_length); + ecryptfs_write_inode_size_to_header(lower_file, + lower_dentry->d_inode, + inode); + /* We are reducing the size of the ecryptfs file, and need to + * know if we need to reduce the size of the lower file. */ + lower_size_before_truncate = + upper_size_to_lower_size(crypt_stat, i_size); + lower_size_after_truncate = + upper_size_to_lower_size(crypt_stat, new_length); + if (lower_size_after_truncate < lower_size_before_truncate) + vmtruncate(lower_dentry->d_inode, + lower_size_after_truncate); + } + /* Update the access times */ + lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime + = CURRENT_TIME; + mark_inode_dirty_sync(inode); +out_fput: + fput(lower_file); +out_free: + if (ecryptfs_file_to_private(&fake_ecryptfs_file)) + kmem_cache_free(ecryptfs_file_info_cache, + ecryptfs_file_to_private(&fake_ecryptfs_file)); +out: + return rc; +} + +static int +ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int rc; + + if (nd) { + struct vfsmount *vfsmnt_save = nd->mnt; + struct dentry *dentry_save = nd->dentry; + + nd->mnt = ecryptfs_dentry_to_lower_mnt(nd->dentry); + nd->dentry = ecryptfs_dentry_to_lower(nd->dentry); + rc = permission(ecryptfs_inode_to_lower(inode), mask, nd); + nd->mnt = vfsmnt_save; + nd->dentry = dentry_save; + } else + rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL); + return rc; +} + +/** + * ecryptfs_setattr + * @dentry: dentry handle to the inode to modify + * @ia: Structure with flags of what to change and values + * + * Updates the metadata of an inode. If the update is to the size + * i.e. truncation, then ecryptfs_truncate will handle the size modification + * of both the ecryptfs inode and the lower inode. + * + * All other metadata changes will be passed right to the lower filesystem, + * and we will just update our inode to look like the lower. + */ +static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) +{ + int rc = 0; + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + struct ecryptfs_crypt_stat *crypt_stat; + + crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; + lower_dentry = ecryptfs_dentry_to_lower(dentry); + inode = dentry->d_inode; + lower_inode = ecryptfs_inode_to_lower(inode); + if (ia->ia_valid & ATTR_SIZE) { + ecryptfs_printk(KERN_DEBUG, + "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n", + ia->ia_valid, ATTR_SIZE); + rc = ecryptfs_truncate(dentry, ia->ia_size); + /* ecryptfs_truncate handles resizing of the lower file */ + ia->ia_valid &= ~ATTR_SIZE; + ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n", + ia->ia_valid); + if (rc < 0) + goto out; + } + rc = notify_change(lower_dentry, ia); +out: + ecryptfs_copy_attr_all(inode, lower_inode); + return rc; +} + +static int +ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->setxattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value, + size, flags); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +static ssize_t +ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, + size_t size) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->getxattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value, + size); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +static ssize_t +ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->listxattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +static int ecryptfs_removexattr(struct dentry *dentry, const char *name) +{ + int rc = 0; + struct dentry *lower_dentry; + + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_inode->i_op->removexattr) { + rc = -ENOSYS; + goto out; + } + mutex_lock(&lower_dentry->d_inode->i_mutex); + rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name); + mutex_unlock(&lower_dentry->d_inode->i_mutex); +out: + return rc; +} + +int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode) +{ + if ((ecryptfs_inode_to_lower(inode) + == (struct inode *)candidate_lower_inode)) + return 1; + else + return 0; +} + +int ecryptfs_inode_set(struct inode *inode, void *lower_inode) +{ + ecryptfs_init_inode(inode, (struct inode *)lower_inode); + return 0; +} + +struct inode_operations ecryptfs_symlink_iops = { + .readlink = ecryptfs_readlink, + .follow_link = ecryptfs_follow_link, + .put_link = ecryptfs_put_link, + .permission = ecryptfs_permission, + .setattr = ecryptfs_setattr, + .setxattr = ecryptfs_setxattr, + .getxattr = ecryptfs_getxattr, + .listxattr = ecryptfs_listxattr, + .removexattr = ecryptfs_removexattr +}; + +struct inode_operations ecryptfs_dir_iops = { + .create = ecryptfs_create, + .lookup = ecryptfs_lookup, + .link = ecryptfs_link, + .unlink = ecryptfs_unlink, + .symlink = ecryptfs_symlink, + .mkdir = ecryptfs_mkdir, + .rmdir = ecryptfs_rmdir, + .mknod = ecryptfs_mknod, + .rename = ecryptfs_rename, + .permission = ecryptfs_permission, + .setattr = ecryptfs_setattr, + .setxattr = ecryptfs_setxattr, + .getxattr = ecryptfs_getxattr, + .listxattr = ecryptfs_listxattr, + .removexattr = ecryptfs_removexattr +}; + +struct inode_operations ecryptfs_main_iops = { + .permission = ecryptfs_permission, + .setattr = ecryptfs_setattr, + .setxattr = ecryptfs_setxattr, + .getxattr = ecryptfs_getxattr, + .listxattr = ecryptfs_listxattr, + .removexattr = ecryptfs_removexattr +}; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c new file mode 100644 index 000000000000..ba454785a0c5 --- /dev/null +++ b/fs/ecryptfs/keystore.c @@ -0,0 +1,1061 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * In-kernel key management code. Includes functions to parse and + * write authentication token-related packets with the underlying + * file. + * + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> + * Michael C. Thompson <mcthomps@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/pagemap.h> +#include <linux/key.h> +#include <linux/random.h> +#include <linux/crypto.h> +#include <linux/scatterlist.h> +#include "ecryptfs_kernel.h" + +/** + * request_key returned an error instead of a valid key address; + * determine the type of error, make appropriate log entries, and + * return an error code. + */ +int process_request_key_err(long err_code) +{ + int rc = 0; + + switch (err_code) { + case ENOKEY: + ecryptfs_printk(KERN_WARNING, "No key\n"); + rc = -ENOENT; + break; + case EKEYEXPIRED: + ecryptfs_printk(KERN_WARNING, "Key expired\n"); + rc = -ETIME; + break; + case EKEYREVOKED: + ecryptfs_printk(KERN_WARNING, "Key revoked\n"); + rc = -EINVAL; + break; + default: + ecryptfs_printk(KERN_WARNING, "Unknown error code: " + "[0x%.16x]\n", err_code); + rc = -EINVAL; + } + return rc; +} + +static void wipe_auth_tok_list(struct list_head *auth_tok_list_head) +{ + struct list_head *walker; + struct ecryptfs_auth_tok_list_item *auth_tok_list_item; + + walker = auth_tok_list_head->next; + while (walker != auth_tok_list_head) { + auth_tok_list_item = + list_entry(walker, struct ecryptfs_auth_tok_list_item, + list); + walker = auth_tok_list_item->list.next; + memset(auth_tok_list_item, 0, + sizeof(struct ecryptfs_auth_tok_list_item)); + kmem_cache_free(ecryptfs_auth_tok_list_item_cache, + auth_tok_list_item); + } +} + +struct kmem_cache *ecryptfs_auth_tok_list_item_cache; + +/** + * parse_packet_length + * @data: Pointer to memory containing length at offset + * @size: This function writes the decoded size to this memory + * address; zero on error + * @length_size: The number of bytes occupied by the encoded length + * + * Returns Zero on success + */ +static int parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size) +{ + int rc = 0; + + (*length_size) = 0; + (*size) = 0; + if (data[0] < 192) { + /* One-byte length */ + (*size) = data[0]; + (*length_size) = 1; + } else if (data[0] < 224) { + /* Two-byte length */ + (*size) = ((data[0] - 192) * 256); + (*size) += (data[1] + 192); + (*length_size) = 2; + } else if (data[0] == 255) { + /* Five-byte length; we're not supposed to see this */ + ecryptfs_printk(KERN_ERR, "Five-byte packet length not " + "supported\n"); + rc = -EINVAL; + goto out; + } else { + ecryptfs_printk(KERN_ERR, "Error parsing packet length\n"); + rc = -EINVAL; + goto out; + } +out: + return rc; +} + +/** + * write_packet_length + * @dest: The byte array target into which to write the + * length. Must have at least 5 bytes allocated. + * @size: The length to write. + * @packet_size_length: The number of bytes used to encode the + * packet length is written to this address. + * + * Returns zero on success; non-zero on error. + */ +static int write_packet_length(char *dest, size_t size, + size_t *packet_size_length) +{ + int rc = 0; + + if (size < 192) { + dest[0] = size; + (*packet_size_length) = 1; + } else if (size < 65536) { + dest[0] = (((size - 192) / 256) + 192); + dest[1] = ((size - 192) % 256); + (*packet_size_length) = 2; + } else { + rc = -EINVAL; + ecryptfs_printk(KERN_WARNING, + "Unsupported packet size: [%d]\n", size); + } + return rc; +} + +/** + * parse_tag_3_packet + * @crypt_stat: The cryptographic context to modify based on packet + * contents. + * @data: The raw bytes of the packet. + * @auth_tok_list: eCryptfs parses packets into authentication tokens; + * a new authentication token will be placed at the end + * of this list for this packet. + * @new_auth_tok: Pointer to a pointer to memory that this function + * allocates; sets the memory address of the pointer to + * NULL on error. This object is added to the + * auth_tok_list. + * @packet_size: This function writes the size of the parsed packet + * into this memory location; zero on error. + * @max_packet_size: maximum number of bytes to parse + * + * Returns zero on success; non-zero on error. + */ +static int +parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, + unsigned char *data, struct list_head *auth_tok_list, + struct ecryptfs_auth_tok **new_auth_tok, + size_t *packet_size, size_t max_packet_size) +{ + int rc = 0; + size_t body_size; + struct ecryptfs_auth_tok_list_item *auth_tok_list_item; + size_t length_size; + + (*packet_size) = 0; + (*new_auth_tok) = NULL; + + /* we check that: + * one byte for the Tag 3 ID flag + * two bytes for the body size + * do not exceed the maximum_packet_size + */ + if (unlikely((*packet_size) + 3 > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out; + } + + /* check for Tag 3 identifyer - one byte */ + if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) { + ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n", + ECRYPTFS_TAG_3_PACKET_TYPE); + rc = -EINVAL; + goto out; + } + /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or + * at end of function upon failure */ + auth_tok_list_item = + kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL); + if (!auth_tok_list_item) { + ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); + rc = -ENOMEM; + goto out; + } + memset(auth_tok_list_item, 0, + sizeof(struct ecryptfs_auth_tok_list_item)); + (*new_auth_tok) = &auth_tok_list_item->auth_tok; + + /* check for body size - one to two bytes */ + rc = parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " + "rc = [%d]\n", rc); + goto out_free; + } + if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) { + ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", + body_size); + rc = -EINVAL; + goto out_free; + } + (*packet_size) += length_size; + + /* now we know the length of the remainting Tag 3 packet size: + * 5 fix bytes for: version string, cipher, S2K ID, hash algo, + * number of hash iterations + * ECRYPTFS_SALT_SIZE bytes for salt + * body_size bytes minus the stuff above is the encrypted key size + */ + if (unlikely((*packet_size) + body_size > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out_free; + } + + /* There are 5 characters of additional information in the + * packet */ + (*new_auth_tok)->session_key.encrypted_key_size = + body_size - (0x05 + ECRYPTFS_SALT_SIZE); + ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n", + (*new_auth_tok)->session_key.encrypted_key_size); + + /* Version 4 (from RFC2440) - one byte */ + if (unlikely(data[(*packet_size)++] != 0x04)) { + ecryptfs_printk(KERN_DEBUG, "Unknown version number " + "[%d]\n", data[(*packet_size) - 1]); + rc = -EINVAL; + goto out_free; + } + + /* cipher - one byte */ + ecryptfs_cipher_code_to_string(crypt_stat->cipher, + (u16)data[(*packet_size)]); + /* A little extra work to differentiate among the AES key + * sizes; see RFC2440 */ + switch(data[(*packet_size)++]) { + case RFC2440_CIPHER_AES_192: + crypt_stat->key_size = 24; + break; + default: + crypt_stat->key_size = + (*new_auth_tok)->session_key.encrypted_key_size; + } + ecryptfs_init_crypt_ctx(crypt_stat); + /* S2K identifier 3 (from RFC2440) */ + if (unlikely(data[(*packet_size)++] != 0x03)) { + ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently " + "supported\n"); + rc = -ENOSYS; + goto out_free; + } + + /* TODO: finish the hash mapping */ + /* hash algorithm - one byte */ + switch (data[(*packet_size)++]) { + case 0x01: /* See RFC2440 for these numbers and their mappings */ + /* Choose MD5 */ + /* salt - ECRYPTFS_SALT_SIZE bytes */ + memcpy((*new_auth_tok)->token.password.salt, + &data[(*packet_size)], ECRYPTFS_SALT_SIZE); + (*packet_size) += ECRYPTFS_SALT_SIZE; + + /* This conversion was taken straight from RFC2440 */ + /* number of hash iterations - one byte */ + (*new_auth_tok)->token.password.hash_iterations = + ((u32) 16 + (data[(*packet_size)] & 15)) + << ((data[(*packet_size)] >> 4) + 6); + (*packet_size)++; + + /* encrypted session key - + * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */ + memcpy((*new_auth_tok)->session_key.encrypted_key, + &data[(*packet_size)], + (*new_auth_tok)->session_key.encrypted_key_size); + (*packet_size) += + (*new_auth_tok)->session_key.encrypted_key_size; + (*new_auth_tok)->session_key.flags &= + ~ECRYPTFS_CONTAINS_DECRYPTED_KEY; + (*new_auth_tok)->session_key.flags |= + ECRYPTFS_CONTAINS_ENCRYPTED_KEY; + (*new_auth_tok)->token.password.hash_algo = 0x01; + break; + default: + ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: " + "[%d]\n", data[(*packet_size) - 1]); + rc = -ENOSYS; + goto out_free; + } + (*new_auth_tok)->token_type = ECRYPTFS_PASSWORD; + /* TODO: Parametarize; we might actually want userspace to + * decrypt the session key. */ + ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, + ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT); + ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags, + ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT); + list_add(&auth_tok_list_item->list, auth_tok_list); + goto out; +out_free: + (*new_auth_tok) = NULL; + memset(auth_tok_list_item, 0, + sizeof(struct ecryptfs_auth_tok_list_item)); + kmem_cache_free(ecryptfs_auth_tok_list_item_cache, + auth_tok_list_item); +out: + if (rc) + (*packet_size) = 0; + return rc; +} + +/** + * parse_tag_11_packet + * @data: The raw bytes of the packet + * @contents: This function writes the data contents of the literal + * packet into this memory location + * @max_contents_bytes: The maximum number of bytes that this function + * is allowed to write into contents + * @tag_11_contents_size: This function writes the size of the parsed + * contents into this memory location; zero on + * error + * @packet_size: This function writes the size of the parsed packet + * into this memory location; zero on error + * @max_packet_size: maximum number of bytes to parse + * + * Returns zero on success; non-zero on error. + */ +static int +parse_tag_11_packet(unsigned char *data, unsigned char *contents, + size_t max_contents_bytes, size_t *tag_11_contents_size, + size_t *packet_size, size_t max_packet_size) +{ + int rc = 0; + size_t body_size; + size_t length_size; + + (*packet_size) = 0; + (*tag_11_contents_size) = 0; + + /* check that: + * one byte for the Tag 11 ID flag + * two bytes for the Tag 11 length + * do not exceed the maximum_packet_size + */ + if (unlikely((*packet_size) + 3 > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out; + } + + /* check for Tag 11 identifyer - one byte */ + if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) { + ecryptfs_printk(KERN_WARNING, + "Invalid tag 11 packet format\n"); + rc = -EINVAL; + goto out; + } + + /* get Tag 11 content length - one or two bytes */ + rc = parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); + if (rc) { + ecryptfs_printk(KERN_WARNING, + "Invalid tag 11 packet format\n"); + goto out; + } + (*packet_size) += length_size; + + if (body_size < 13) { + ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", + body_size); + rc = -EINVAL; + goto out; + } + /* We have 13 bytes of surrounding packet values */ + (*tag_11_contents_size) = (body_size - 13); + + /* now we know the length of the remainting Tag 11 packet size: + * 14 fix bytes for: special flag one, special flag two, + * 12 skipped bytes + * body_size bytes minus the stuff above is the Tag 11 content + */ + /* FIXME why is the body size one byte smaller than the actual + * size of the body? + * this seems to be an error here as well as in + * write_tag_11_packet() */ + if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) { + ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + rc = -EINVAL; + goto out; + } + + /* special flag one - one byte */ + if (data[(*packet_size)++] != 0x62) { + ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); + rc = -EINVAL; + goto out; + } + + /* special flag two - one byte */ + if (data[(*packet_size)++] != 0x08) { + ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); + rc = -EINVAL; + goto out; + } + + /* skip the next 12 bytes */ + (*packet_size) += 12; /* We don't care about the filename or + * the timestamp */ + + /* get the Tag 11 contents - tag_11_contents_size bytes */ + memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size)); + (*packet_size) += (*tag_11_contents_size); + +out: + if (rc) { + (*packet_size) = 0; + (*tag_11_contents_size) = 0; + } + return rc; +} + +/** + * decrypt_session_key - Decrypt the session key with the given auth_tok. + * + * Returns Zero on success; non-zero error otherwise. + */ +static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, + struct ecryptfs_crypt_stat *crypt_stat) +{ + int rc = 0; + struct ecryptfs_password *password_s_ptr; + struct crypto_tfm *tfm = NULL; + struct scatterlist src_sg[2], dst_sg[2]; + struct mutex *tfm_mutex = NULL; + /* TODO: Use virt_to_scatterlist for these */ + char *encrypted_session_key; + char *session_key; + + password_s_ptr = &auth_tok->token.password; + if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags, + ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) + ecryptfs_printk(KERN_DEBUG, "Session key encryption key " + "set; skipping key generation\n"); + ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])" + ":\n", + password_s_ptr->session_key_encryption_key_bytes); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key, + password_s_ptr-> + session_key_encryption_key_bytes); + if (!strcmp(crypt_stat->cipher, + crypt_stat->mount_crypt_stat->global_default_cipher_name) + && crypt_stat->mount_crypt_stat->global_key_tfm) { + tfm = crypt_stat->mount_crypt_stat->global_key_tfm; + tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; + } else { + tfm = crypto_alloc_tfm(crypt_stat->cipher, + CRYPTO_TFM_REQ_WEAK_KEY); + if (!tfm) { + printk(KERN_ERR "Error allocating crypto context\n"); + rc = -ENOMEM; + goto out; + } + } + if (password_s_ptr->session_key_encryption_key_bytes + < crypto_tfm_alg_min_keysize(tfm)) { + printk(KERN_WARNING "Session key encryption key is [%d] bytes; " + "minimum keysize for selected cipher is [%d] bytes.\n", + password_s_ptr->session_key_encryption_key_bytes, + crypto_tfm_alg_min_keysize(tfm)); + rc = -EINVAL; + goto out; + } + if (tfm_mutex) + mutex_lock(tfm_mutex); + crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key, + crypt_stat->key_size); + /* TODO: virt_to_scatterlist */ + encrypted_session_key = (char *)__get_free_page(GFP_KERNEL); + if (!encrypted_session_key) { + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out_free_tfm; + } + session_key = (char *)__get_free_page(GFP_KERNEL); + if (!session_key) { + kfree(encrypted_session_key); + ecryptfs_printk(KERN_ERR, "Out of memory\n"); + rc = -ENOMEM; + goto out_free_tfm; + } + memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key, + auth_tok->session_key.encrypted_key_size); + src_sg[0].page = virt_to_page(encrypted_session_key); + src_sg[0].offset = 0; + BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE); + src_sg[0].length = auth_tok->session_key.encrypted_key_size; + dst_sg[0].page = virt_to_page(session_key); + dst_sg[0].offset = 0; + auth_tok->session_key.decrypted_key_size = + auth_tok->session_key.encrypted_key_size; + dst_sg[0].length = auth_tok->session_key.encrypted_key_size; + /* TODO: Handle error condition */ + crypto_cipher_decrypt(tfm, dst_sg, src_sg, + auth_tok->session_key.encrypted_key_size); + auth_tok->session_key.decrypted_key_size = + auth_tok->session_key.encrypted_key_size; + memcpy(auth_tok->session_key.decrypted_key, session_key, + auth_tok->session_key.decrypted_key_size); + auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; + memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, + auth_tok->session_key.decrypted_key_size); + ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID); + ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(crypt_stat->key, + crypt_stat->key_size); + memset(encrypted_session_key, 0, PAGE_CACHE_SIZE); + free_page((unsigned long)encrypted_session_key); + memset(session_key, 0, PAGE_CACHE_SIZE); + free_page((unsigned long)session_key); +out_free_tfm: + if (tfm_mutex) + mutex_unlock(tfm_mutex); + else + crypto_free_tfm(tfm); +out: + return rc; +} + +/** + * ecryptfs_parse_packet_set + * @dest: The header page in memory + * @version: Version of file format, to guide parsing behavior + * + * Get crypt_stat to have the file's session key if the requisite key + * is available to decrypt the session key. + * + * Returns Zero if a valid authentication token was retrieved and + * processed; negative value for file not encrypted or for error + * conditions. + */ +int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, + unsigned char *src, + struct dentry *ecryptfs_dentry) +{ + size_t i = 0; + int rc = 0; + size_t found_auth_tok = 0; + size_t next_packet_is_auth_tok_packet; + char sig[ECRYPTFS_SIG_SIZE_HEX]; + struct list_head auth_tok_list; + struct list_head *walker; + struct ecryptfs_auth_tok *chosen_auth_tok = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + struct ecryptfs_auth_tok *candidate_auth_tok = NULL; + size_t packet_size; + struct ecryptfs_auth_tok *new_auth_tok; + unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE]; + size_t tag_11_contents_size; + size_t tag_11_packet_size; + + INIT_LIST_HEAD(&auth_tok_list); + /* Parse the header to find as many packets as we can, these will be + * added the our &auth_tok_list */ + next_packet_is_auth_tok_packet = 1; + while (next_packet_is_auth_tok_packet) { + size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i); + + switch (src[i]) { + case ECRYPTFS_TAG_3_PACKET_TYPE: + rc = parse_tag_3_packet(crypt_stat, + (unsigned char *)&src[i], + &auth_tok_list, &new_auth_tok, + &packet_size, max_packet_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error parsing " + "tag 3 packet\n"); + rc = -EIO; + goto out_wipe_list; + } + i += packet_size; + rc = parse_tag_11_packet((unsigned char *)&src[i], + sig_tmp_space, + ECRYPTFS_SIG_SIZE, + &tag_11_contents_size, + &tag_11_packet_size, + max_packet_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "No valid " + "(ecryptfs-specific) literal " + "packet containing " + "authentication token " + "signature found after " + "tag 3 packet\n"); + rc = -EIO; + goto out_wipe_list; + } + i += tag_11_packet_size; + if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) { + ecryptfs_printk(KERN_ERR, "Expected " + "signature of size [%d]; " + "read size [%d]\n", + ECRYPTFS_SIG_SIZE, + tag_11_contents_size); + rc = -EIO; + goto out_wipe_list; + } + ecryptfs_to_hex(new_auth_tok->token.password.signature, + sig_tmp_space, tag_11_contents_size); + new_auth_tok->token.password.signature[ + ECRYPTFS_PASSWORD_SIG_SIZE] = '\0'; + ECRYPTFS_SET_FLAG(crypt_stat->flags, + ECRYPTFS_ENCRYPTED); + break; + case ECRYPTFS_TAG_11_PACKET_TYPE: + ecryptfs_printk(KERN_WARNING, "Invalid packet set " + "(Tag 11 not allowed by itself)\n"); + rc = -EIO; + goto out_wipe_list; + break; + default: + ecryptfs_printk(KERN_DEBUG, "No packet at offset " + "[%d] of the file header; hex value of " + "character is [0x%.2x]\n", i, src[i]); + next_packet_is_auth_tok_packet = 0; + } + } + if (list_empty(&auth_tok_list)) { + rc = -EINVAL; /* Do not support non-encrypted files in + * the 0.1 release */ + goto out; + } + /* If we have a global auth tok, then we should try to use + * it */ + if (mount_crypt_stat->global_auth_tok) { + memcpy(sig, mount_crypt_stat->global_auth_tok_sig, + ECRYPTFS_SIG_SIZE_HEX); + chosen_auth_tok = mount_crypt_stat->global_auth_tok; + } else + BUG(); /* We should always have a global auth tok in + * the 0.1 release */ + /* Scan list to see if our chosen_auth_tok works */ + list_for_each(walker, &auth_tok_list) { + struct ecryptfs_auth_tok_list_item *auth_tok_list_item; + auth_tok_list_item = + list_entry(walker, struct ecryptfs_auth_tok_list_item, + list); + candidate_auth_tok = &auth_tok_list_item->auth_tok; + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, + "Considering cadidate auth tok:\n"); + ecryptfs_dump_auth_tok(candidate_auth_tok); + } + /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */ + if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD + && !strncmp(candidate_auth_tok->token.password.signature, + sig, ECRYPTFS_SIG_SIZE_HEX)) { + found_auth_tok = 1; + goto leave_list; + /* TODO: Transfer the common salt into the + * crypt_stat salt */ + } + } +leave_list: + if (!found_auth_tok) { + ecryptfs_printk(KERN_ERR, "Could not find authentication " + "token on temporary list for sig [%.*s]\n", + ECRYPTFS_SIG_SIZE_HEX, sig); + rc = -EIO; + goto out_wipe_list; + } else { + memcpy(&(candidate_auth_tok->token.password), + &(chosen_auth_tok->token.password), + sizeof(struct ecryptfs_password)); + rc = decrypt_session_key(candidate_auth_tok, crypt_stat); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error decrypting the " + "session key\n"); + goto out_wipe_list; + } + rc = ecryptfs_compute_root_iv(crypt_stat); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error computing " + "the root IV\n"); + goto out_wipe_list; + } + } + rc = ecryptfs_init_crypt_ctx(crypt_stat); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error initializing crypto " + "context for cipher [%s]; rc = [%d]\n", + crypt_stat->cipher, rc); + } +out_wipe_list: + wipe_auth_tok_list(&auth_tok_list); +out: + return rc; +} + +/** + * write_tag_11_packet + * @dest: Target into which Tag 11 packet is to be written + * @max: Maximum packet length + * @contents: Byte array of contents to copy in + * @contents_length: Number of bytes in contents + * @packet_length: Length of the Tag 11 packet written; zero on error + * + * Returns zero on success; non-zero on error. + */ +static int +write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length, + size_t *packet_length) +{ + int rc = 0; + size_t packet_size_length; + + (*packet_length) = 0; + if ((13 + contents_length) > max) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "Packet length larger than " + "maximum allowable\n"); + goto out; + } + /* General packet header */ + /* Packet tag */ + dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; + /* Packet length */ + rc = write_packet_length(&dest[(*packet_length)], + (13 + contents_length), &packet_size_length); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet " + "header; cannot generate packet length\n"); + goto out; + } + (*packet_length) += packet_size_length; + /* Tag 11 specific */ + /* One-octet field that describes how the data is formatted */ + dest[(*packet_length)++] = 0x62; /* binary data */ + /* One-octet filename length followed by filename */ + dest[(*packet_length)++] = 8; + memcpy(&dest[(*packet_length)], "_CONSOLE", 8); + (*packet_length) += 8; + /* Four-octet number indicating modification date */ + memset(&dest[(*packet_length)], 0x00, 4); + (*packet_length) += 4; + /* Remainder is literal data */ + memcpy(&dest[(*packet_length)], contents, contents_length); + (*packet_length) += contents_length; + out: + if (rc) + (*packet_length) = 0; + return rc; +} + +/** + * write_tag_3_packet + * @dest: Buffer into which to write the packet + * @max: Maximum number of bytes that can be written + * @auth_tok: Authentication token + * @crypt_stat: The cryptographic context + * @key_rec: encrypted key + * @packet_size: This function will write the number of bytes that end + * up constituting the packet; set to zero on error + * + * Returns zero on success; non-zero on error. + */ +static int +write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_key_record *key_rec, size_t *packet_size) +{ + int rc = 0; + + size_t i; + size_t signature_is_valid = 0; + size_t encrypted_session_key_valid = 0; + char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; + struct scatterlist dest_sg[2]; + struct scatterlist src_sg[2]; + struct crypto_tfm *tfm = NULL; + struct mutex *tfm_mutex = NULL; + size_t key_rec_size; + size_t packet_size_length; + size_t cipher_code; + + (*packet_size) = 0; + /* Check for a valid signature on the auth_tok */ + for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++) + signature_is_valid |= auth_tok->token.password.signature[i]; + if (!signature_is_valid) + BUG(); + ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature, + ECRYPTFS_SIG_SIZE); + encrypted_session_key_valid = 0; + for (i = 0; i < crypt_stat->key_size; i++) + encrypted_session_key_valid |= + auth_tok->session_key.encrypted_key[i]; + if (encrypted_session_key_valid) { + memcpy((*key_rec).enc_key, + auth_tok->session_key.encrypted_key, + auth_tok->session_key.encrypted_key_size); + goto encrypted_session_key_set; + } + if (auth_tok->session_key.encrypted_key_size == 0) + auth_tok->session_key.encrypted_key_size = + crypt_stat->key_size; + if (crypt_stat->key_size == 24 + && strcmp("aes", crypt_stat->cipher) == 0) { + memset((crypt_stat->key + 24), 0, 8); + auth_tok->session_key.encrypted_key_size = 32; + } + (*key_rec).enc_key_size = + auth_tok->session_key.encrypted_key_size; + if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags, + ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) { + ecryptfs_printk(KERN_DEBUG, "Using previously generated " + "session key encryption key of size [%d]\n", + auth_tok->token.password. + session_key_encryption_key_bytes); + memcpy(session_key_encryption_key, + auth_tok->token.password.session_key_encryption_key, + crypt_stat->key_size); + ecryptfs_printk(KERN_DEBUG, + "Cached session key " "encryption key: \n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex(session_key_encryption_key, 16); + } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n"); + ecryptfs_dump_hex(session_key_encryption_key, 16); + } + rc = virt_to_scatterlist(crypt_stat->key, + (*key_rec).enc_key_size, src_sg, 2); + if (!rc) { + ecryptfs_printk(KERN_ERR, "Error generating scatterlist " + "for crypt_stat session key\n"); + rc = -ENOMEM; + goto out; + } + rc = virt_to_scatterlist((*key_rec).enc_key, + (*key_rec).enc_key_size, dest_sg, 2); + if (!rc) { + ecryptfs_printk(KERN_ERR, "Error generating scatterlist " + "for crypt_stat encrypted session key\n"); + rc = -ENOMEM; + goto out; + } + if (!strcmp(crypt_stat->cipher, + crypt_stat->mount_crypt_stat->global_default_cipher_name) + && crypt_stat->mount_crypt_stat->global_key_tfm) { + tfm = crypt_stat->mount_crypt_stat->global_key_tfm; + tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; + } else + tfm = crypto_alloc_tfm(crypt_stat->cipher, 0); + if (!tfm) { + ecryptfs_printk(KERN_ERR, "Could not initialize crypto " + "context for cipher [%s]\n", + crypt_stat->cipher); + rc = -EINVAL; + goto out; + } + if (tfm_mutex) + mutex_lock(tfm_mutex); + rc = crypto_cipher_setkey(tfm, session_key_encryption_key, + crypt_stat->key_size); + if (rc < 0) { + if (tfm_mutex) + mutex_unlock(tfm_mutex); + ecryptfs_printk(KERN_ERR, "Error setting key for crypto " + "context\n"); + goto out; + } + rc = 0; + ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", + crypt_stat->key_size); + crypto_cipher_encrypt(tfm, dest_sg, src_sg, + (*key_rec).enc_key_size); + if (tfm_mutex) + mutex_unlock(tfm_mutex); + ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); + if (ecryptfs_verbosity > 0) + ecryptfs_dump_hex((*key_rec).enc_key, + (*key_rec).enc_key_size); +encrypted_session_key_set: + /* Now we have a valid key_rec. Append it to the + * key_rec set. */ + key_rec_size = (sizeof(struct ecryptfs_key_record) + - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES + + ((*key_rec).enc_key_size)); + /* TODO: Include a packet size limit as a parameter to this + * function once we have multi-packet headers (for versions + * later than 0.1 */ + if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) { + ecryptfs_printk(KERN_ERR, "Keyset too large\n"); + rc = -EINVAL; + goto out; + } + /* TODO: Packet size limit */ + /* We have 5 bytes of surrounding packet data */ + if ((0x05 + ECRYPTFS_SALT_SIZE + + (*key_rec).enc_key_size) >= max) { + ecryptfs_printk(KERN_ERR, "Authentication token is too " + "large\n"); + rc = -EINVAL; + goto out; + } + /* This format is inspired by OpenPGP; see RFC 2440 + * packet tag 3 */ + dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; + /* ver+cipher+s2k+hash+salt+iter+enc_key */ + rc = write_packet_length(&dest[(*packet_size)], + (0x05 + ECRYPTFS_SALT_SIZE + + (*key_rec).enc_key_size), + &packet_size_length); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet " + "header; cannot generate packet length\n"); + goto out; + } + (*packet_size) += packet_size_length; + dest[(*packet_size)++] = 0x04; /* version 4 */ + cipher_code = ecryptfs_code_for_cipher_string(crypt_stat); + if (cipher_code == 0) { + ecryptfs_printk(KERN_WARNING, "Unable to generate code for " + "cipher [%s]\n", crypt_stat->cipher); + rc = -EINVAL; + goto out; + } + dest[(*packet_size)++] = cipher_code; + dest[(*packet_size)++] = 0x03; /* S2K */ + dest[(*packet_size)++] = 0x01; /* MD5 (TODO: parameterize) */ + memcpy(&dest[(*packet_size)], auth_tok->token.password.salt, + ECRYPTFS_SALT_SIZE); + (*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */ + dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */ + memcpy(&dest[(*packet_size)], (*key_rec).enc_key, + (*key_rec).enc_key_size); + (*packet_size) += (*key_rec).enc_key_size; +out: + if (tfm && !tfm_mutex) + crypto_free_tfm(tfm); + if (rc) + (*packet_size) = 0; + return rc; +} + +/** + * ecryptfs_generate_key_packet_set + * @dest: Virtual address from which to write the key record set + * @crypt_stat: The cryptographic context from which the + * authentication tokens will be retrieved + * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat + * for the global parameters + * @len: The amount written + * @max: The maximum amount of data allowed to be written + * + * Generates a key packet set and writes it to the virtual address + * passed in. + * + * Returns zero on success; non-zero on error. + */ +int +ecryptfs_generate_key_packet_set(char *dest_base, + struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry, size_t *len, + size_t max) +{ + int rc = 0; + struct ecryptfs_auth_tok *auth_tok; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private( + ecryptfs_dentry->d_sb)->mount_crypt_stat; + size_t written; + struct ecryptfs_key_record key_rec; + + (*len) = 0; + if (mount_crypt_stat->global_auth_tok) { + auth_tok = mount_crypt_stat->global_auth_tok; + if (auth_tok->token_type == ECRYPTFS_PASSWORD) { + rc = write_tag_3_packet((dest_base + (*len)), + max, auth_tok, + crypt_stat, &key_rec, + &written); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error " + "writing tag 3 packet\n"); + goto out; + } + (*len) += written; + /* Write auth tok signature packet */ + rc = write_tag_11_packet( + (dest_base + (*len)), + (max - (*len)), + key_rec.sig, ECRYPTFS_SIG_SIZE, &written); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error writing " + "auth tok signature packet\n"); + goto out; + } + (*len) += written; + } else { + ecryptfs_printk(KERN_WARNING, "Unsupported " + "authentication token type\n"); + rc = -EINVAL; + goto out; + } + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error writing " + "authentication token packet with sig " + "= [%s]\n", + mount_crypt_stat->global_auth_tok_sig); + rc = -EIO; + goto out; + } + } else + BUG(); + if (likely((max - (*len)) > 0)) { + dest_base[(*len)] = 0x00; + } else { + ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); + rc = -EIO; + } +out: + if (rc) + (*len) = 0; + return rc; +} diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c new file mode 100644 index 000000000000..7a11b8ae6644 --- /dev/null +++ b/fs/ecryptfs/main.c @@ -0,0 +1,831 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * Michael C. Thompson <mcthomps@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/dcache.h> +#include <linux/file.h> +#include <linux/module.h> +#include <linux/namei.h> +#include <linux/skbuff.h> +#include <linux/crypto.h> +#include <linux/netlink.h> +#include <linux/mount.h> +#include <linux/dcache.h> +#include <linux/pagemap.h> +#include <linux/key.h> +#include <linux/parser.h> +#include "ecryptfs_kernel.h" + +/** + * Module parameter that defines the ecryptfs_verbosity level. + */ +int ecryptfs_verbosity = 0; + +module_param(ecryptfs_verbosity, int, 0); +MODULE_PARM_DESC(ecryptfs_verbosity, + "Initial verbosity level (0 or 1; defaults to " + "0, which is Quiet)"); + +void __ecryptfs_printk(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + if (fmt[1] == '7') { /* KERN_DEBUG */ + if (ecryptfs_verbosity >= 1) + vprintk(fmt, args); + } else + vprintk(fmt, args); + va_end(args); +} + +/** + * ecryptfs_interpose + * @lower_dentry: Existing dentry in the lower filesystem + * @dentry: ecryptfs' dentry + * @sb: ecryptfs's super_block + * @flag: If set to true, then d_add is called, else d_instantiate is called + * + * Interposes upper and lower dentries. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, + struct super_block *sb, int flag) +{ + struct inode *lower_inode; + struct inode *inode; + int rc = 0; + + lower_inode = lower_dentry->d_inode; + if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { + rc = -EXDEV; + goto out; + } + if (!igrab(lower_inode)) { + rc = -ESTALE; + goto out; + } + inode = iget5_locked(sb, (unsigned long)lower_inode, + ecryptfs_inode_test, ecryptfs_inode_set, + lower_inode); + if (!inode) { + rc = -EACCES; + iput(lower_inode); + goto out; + } + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + else + iput(lower_inode); + if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &ecryptfs_symlink_iops; + else if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &ecryptfs_dir_iops; + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &ecryptfs_dir_fops; + /* TODO: Is there a better way to identify if the inode is + * special? */ + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + dentry->d_op = &ecryptfs_dops; + if (flag) + d_add(dentry, inode); + else + d_instantiate(dentry, inode); + ecryptfs_copy_attr_all(inode, lower_inode); + /* This size will be overwritten for real files w/ headers and + * other metadata */ + ecryptfs_copy_inode_size(inode, lower_inode); +out: + return rc; +} + +enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug, + ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher, + ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, + ecryptfs_opt_passthrough, ecryptfs_opt_err }; + +static match_table_t tokens = { + {ecryptfs_opt_sig, "sig=%s"}, + {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, + {ecryptfs_opt_debug, "debug=%u"}, + {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"}, + {ecryptfs_opt_cipher, "cipher=%s"}, + {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"}, + {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"}, + {ecryptfs_opt_passthrough, "ecryptfs_passthrough"}, + {ecryptfs_opt_err, NULL} +}; + +/** + * ecryptfs_verify_version + * @version: The version number to confirm + * + * Returns zero on good version; non-zero otherwise + */ +static int ecryptfs_verify_version(u16 version) +{ + int rc = 0; + unsigned char major; + unsigned char minor; + + major = ((version >> 8) & 0xFF); + minor = (version & 0xFF); + if (major != ECRYPTFS_VERSION_MAJOR) { + ecryptfs_printk(KERN_ERR, "Major version number mismatch. " + "Expected [%d]; got [%d]\n", + ECRYPTFS_VERSION_MAJOR, major); + rc = -EINVAL; + goto out; + } + if (minor != ECRYPTFS_VERSION_MINOR) { + ecryptfs_printk(KERN_ERR, "Minor version number mismatch. " + "Expected [%d]; got [%d]\n", + ECRYPTFS_VERSION_MINOR, minor); + rc = -EINVAL; + goto out; + } +out: + return rc; +} + +/** + * ecryptfs_parse_options + * @sb: The ecryptfs super block + * @options: The options pased to the kernel + * + * Parse mount options: + * debug=N - ecryptfs_verbosity level for debug output + * sig=XXX - description(signature) of the key to use + * + * Returns the dentry object of the lower-level (lower/interposed) + * directory; We want to mount our stackable file system on top of + * that lower directory. + * + * The signature of the key to use must be the description of a key + * already in the keyring. Mounting will fail if the key can not be + * found. + * + * Returns zero on success; non-zero on error + */ +static int ecryptfs_parse_options(struct super_block *sb, char *options) +{ + char *p; + int rc = 0; + int sig_set = 0; + int cipher_name_set = 0; + int cipher_key_bytes; + int cipher_key_bytes_set = 0; + struct key *auth_tok_key = NULL; + struct ecryptfs_auth_tok *auth_tok = NULL; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; + substring_t args[MAX_OPT_ARGS]; + int token; + char *sig_src; + char *sig_dst; + char *debug_src; + char *cipher_name_dst; + char *cipher_name_src; + char *cipher_key_bytes_src; + struct crypto_tfm *tmp_tfm; + int cipher_name_len; + + if (!options) { + rc = -EINVAL; + goto out; + } + while ((p = strsep(&options, ",")) != NULL) { + if (!*p) + continue; + token = match_token(p, tokens, args); + switch (token) { + case ecryptfs_opt_sig: + case ecryptfs_opt_ecryptfs_sig: + sig_src = args[0].from; + sig_dst = + mount_crypt_stat->global_auth_tok_sig; + memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX); + sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0'; + ecryptfs_printk(KERN_DEBUG, + "The mount_crypt_stat " + "global_auth_tok_sig set to: " + "[%s]\n", sig_dst); + sig_set = 1; + break; + case ecryptfs_opt_debug: + case ecryptfs_opt_ecryptfs_debug: + debug_src = args[0].from; + ecryptfs_verbosity = + (int)simple_strtol(debug_src, &debug_src, + 0); + ecryptfs_printk(KERN_DEBUG, + "Verbosity set to [%d]" "\n", + ecryptfs_verbosity); + break; + case ecryptfs_opt_cipher: + case ecryptfs_opt_ecryptfs_cipher: + cipher_name_src = args[0].from; + cipher_name_dst = + mount_crypt_stat-> + global_default_cipher_name; + strncpy(cipher_name_dst, cipher_name_src, + ECRYPTFS_MAX_CIPHER_NAME_SIZE); + ecryptfs_printk(KERN_DEBUG, + "The mount_crypt_stat " + "global_default_cipher_name set to: " + "[%s]\n", cipher_name_dst); + cipher_name_set = 1; + break; + case ecryptfs_opt_ecryptfs_key_bytes: + cipher_key_bytes_src = args[0].from; + cipher_key_bytes = + (int)simple_strtol(cipher_key_bytes_src, + &cipher_key_bytes_src, 0); + mount_crypt_stat->global_default_cipher_key_size = + cipher_key_bytes; + ecryptfs_printk(KERN_DEBUG, + "The mount_crypt_stat " + "global_default_cipher_key_size " + "set to: [%d]\n", mount_crypt_stat-> + global_default_cipher_key_size); + cipher_key_bytes_set = 1; + break; + case ecryptfs_opt_passthrough: + mount_crypt_stat->flags |= + ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED; + break; + case ecryptfs_opt_err: + default: + ecryptfs_printk(KERN_WARNING, + "eCryptfs: unrecognized option '%s'\n", + p); + } + } + /* Do not support lack of mount-wide signature in 0.1 + * release */ + if (!sig_set) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "You must supply a valid " + "passphrase auth tok signature as a mount " + "parameter; see the eCryptfs README\n"); + goto out; + } + if (!cipher_name_set) { + cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); + if (unlikely(cipher_name_len + >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { + rc = -EINVAL; + BUG(); + goto out; + } + memcpy(mount_crypt_stat->global_default_cipher_name, + ECRYPTFS_DEFAULT_CIPHER, cipher_name_len); + mount_crypt_stat->global_default_cipher_name[cipher_name_len] + = '\0'; + } + if (!cipher_key_bytes_set) { + mount_crypt_stat->global_default_cipher_key_size = + ECRYPTFS_DEFAULT_KEY_BYTES; + ecryptfs_printk(KERN_DEBUG, "Cipher key size was not " + "specified. Defaulting to [%d]\n", + mount_crypt_stat-> + global_default_cipher_key_size); + } + rc = ecryptfs_process_cipher( + &tmp_tfm, + &mount_crypt_stat->global_key_tfm, + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + if (tmp_tfm) + crypto_free_tfm(tmp_tfm); + if (rc) { + printk(KERN_ERR "Error attempting to initialize cipher [%s] " + "with key size [%Zd] bytes; rc = [%d]\n", + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size, rc); + rc = -EINVAL; + goto out; + } + mutex_init(&mount_crypt_stat->global_key_tfm_mutex); + ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: " + "[%s]\n", mount_crypt_stat->global_auth_tok_sig); + /* The reference to this key is held until umount is done The + * call to key_put is done in ecryptfs_put_super() */ + auth_tok_key = request_key(&key_type_user, + mount_crypt_stat->global_auth_tok_sig, + NULL); + if (!auth_tok_key || IS_ERR(auth_tok_key)) { + ecryptfs_printk(KERN_ERR, "Could not find key with " + "description: [%s]\n", + mount_crypt_stat->global_auth_tok_sig); + process_request_key_err(PTR_ERR(auth_tok_key)); + rc = -EINVAL; + goto out; + } + auth_tok = ecryptfs_get_key_payload_data(auth_tok_key); + if (ecryptfs_verify_version(auth_tok->version)) { + ecryptfs_printk(KERN_ERR, "Data structure version mismatch. " + "Userspace tools must match eCryptfs kernel " + "module with major version [%d] and minor " + "version [%d]\n", ECRYPTFS_VERSION_MAJOR, + ECRYPTFS_VERSION_MINOR); + rc = -EINVAL; + goto out; + } + if (auth_tok->token_type != ECRYPTFS_PASSWORD) { + ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure " + "returned from key\n"); + rc = -EINVAL; + goto out; + } + mount_crypt_stat->global_auth_tok_key = auth_tok_key; + mount_crypt_stat->global_auth_tok = auth_tok; +out: + return rc; +} + +struct kmem_cache *ecryptfs_sb_info_cache; + +/** + * ecryptfs_fill_super + * @sb: The ecryptfs super block + * @raw_data: The options passed to mount + * @silent: Not used but required by function prototype + * + * Sets up what we can of the sb, rest is done in ecryptfs_read_super + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) +{ + int rc = 0; + + /* Released in ecryptfs_put_super() */ + ecryptfs_set_superblock_private(sb, + kmem_cache_alloc(ecryptfs_sb_info_cache, + SLAB_KERNEL)); + if (!ecryptfs_superblock_to_private(sb)) { + ecryptfs_printk(KERN_WARNING, "Out of memory\n"); + rc = -ENOMEM; + goto out; + } + memset(ecryptfs_superblock_to_private(sb), 0, + sizeof(struct ecryptfs_sb_info)); + sb->s_op = &ecryptfs_sops; + /* Released through deactivate_super(sb) from get_sb_nodev */ + sb->s_root = d_alloc(NULL, &(const struct qstr) { + .hash = 0,.name = "/",.len = 1}); + if (!sb->s_root) { + ecryptfs_printk(KERN_ERR, "d_alloc failed\n"); + rc = -ENOMEM; + goto out; + } + sb->s_root->d_op = &ecryptfs_dops; + sb->s_root->d_sb = sb; + sb->s_root->d_parent = sb->s_root; + /* Released in d_release when dput(sb->s_root) is called */ + /* through deactivate_super(sb) from get_sb_nodev() */ + ecryptfs_set_dentry_private(sb->s_root, + kmem_cache_alloc(ecryptfs_dentry_info_cache, + SLAB_KERNEL)); + if (!ecryptfs_dentry_to_private(sb->s_root)) { + ecryptfs_printk(KERN_ERR, + "dentry_info_cache alloc failed\n"); + rc = -ENOMEM; + goto out; + } + memset(ecryptfs_dentry_to_private(sb->s_root), 0, + sizeof(struct ecryptfs_dentry_info)); + rc = 0; +out: + /* Should be able to rely on deactivate_super called from + * get_sb_nodev */ + return rc; +} + +/** + * ecryptfs_read_super + * @sb: The ecryptfs super block + * @dev_name: The path to mount over + * + * Read the super block of the lower filesystem, and use + * ecryptfs_interpose to create our initial inode and super block + * struct. + */ +static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) +{ + int rc; + struct nameidata nd; + struct dentry *lower_root; + struct vfsmount *lower_mnt; + + memset(&nd, 0, sizeof(struct nameidata)); + rc = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); + if (rc) { + ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); + goto out_free; + } + lower_root = nd.dentry; + if (!lower_root->d_inode) { + ecryptfs_printk(KERN_WARNING, + "No directory to interpose on\n"); + rc = -ENOENT; + goto out_free; + } + lower_mnt = nd.mnt; + ecryptfs_set_superblock_lower(sb, lower_root->d_sb); + sb->s_maxbytes = lower_root->d_sb->s_maxbytes; + ecryptfs_set_dentry_lower(sb->s_root, lower_root); + ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); + if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0))) + goto out_free; + rc = 0; + goto out; +out_free: + path_release(&nd); +out: + return rc; +} + +/** + * ecryptfs_get_sb + * @fs_type + * @flags + * @dev_name: The path to mount over + * @raw_data: The options passed into the kernel + * + * The whole ecryptfs_get_sb process is broken into 4 functions: + * ecryptfs_parse_options(): handle options passed to ecryptfs, if any + * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block + * with as much information as it can before needing + * the lower filesystem. + * ecryptfs_read_super(): this accesses the lower filesystem and uses + * ecryptfs_interpolate to perform most of the linking + * ecryptfs_interpolate(): links the lower filesystem into ecryptfs + */ +static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + int rc; + struct super_block *sb; + + rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt); + if (rc < 0) { + printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc); + goto out; + } + sb = mnt->mnt_sb; + rc = ecryptfs_parse_options(sb, raw_data); + if (rc) { + printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc); + goto out_abort; + } + rc = ecryptfs_read_super(sb, dev_name); + if (rc) { + printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc); + goto out_abort; + } + goto out; +out_abort: + dput(sb->s_root); + up_write(&sb->s_umount); + deactivate_super(sb); +out: + return rc; +} + +/** + * ecryptfs_kill_block_super + * @sb: The ecryptfs super block + * + * Used to bring the superblock down and free the private data. + * Private data is free'd in ecryptfs_put_super() + */ +static void ecryptfs_kill_block_super(struct super_block *sb) +{ + generic_shutdown_super(sb); +} + +static struct file_system_type ecryptfs_fs_type = { + .owner = THIS_MODULE, + .name = "ecryptfs", + .get_sb = ecryptfs_get_sb, + .kill_sb = ecryptfs_kill_block_super, + .fs_flags = 0 +}; + +/** + * inode_info_init_once + * + * Initializes the ecryptfs_inode_info_cache when it is created + */ +static void +inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) +{ + struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; + + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(&ei->vfs_inode); +} + +static struct ecryptfs_cache_info { + kmem_cache_t **cache; + const char *name; + size_t size; + void (*ctor)(void*, struct kmem_cache *, unsigned long); +} ecryptfs_cache_infos[] = { + { + .cache = &ecryptfs_auth_tok_list_item_cache, + .name = "ecryptfs_auth_tok_list_item", + .size = sizeof(struct ecryptfs_auth_tok_list_item), + }, + { + .cache = &ecryptfs_file_info_cache, + .name = "ecryptfs_file_cache", + .size = sizeof(struct ecryptfs_file_info), + }, + { + .cache = &ecryptfs_dentry_info_cache, + .name = "ecryptfs_dentry_info_cache", + .size = sizeof(struct ecryptfs_dentry_info), + }, + { + .cache = &ecryptfs_inode_info_cache, + .name = "ecryptfs_inode_cache", + .size = sizeof(struct ecryptfs_inode_info), + .ctor = inode_info_init_once, + }, + { + .cache = &ecryptfs_sb_info_cache, + .name = "ecryptfs_sb_cache", + .size = sizeof(struct ecryptfs_sb_info), + }, + { + .cache = &ecryptfs_header_cache_0, + .name = "ecryptfs_headers_0", + .size = PAGE_CACHE_SIZE, + }, + { + .cache = &ecryptfs_header_cache_1, + .name = "ecryptfs_headers_1", + .size = PAGE_CACHE_SIZE, + }, + { + .cache = &ecryptfs_header_cache_2, + .name = "ecryptfs_headers_2", + .size = PAGE_CACHE_SIZE, + }, + { + .cache = &ecryptfs_lower_page_cache, + .name = "ecryptfs_lower_page_cache", + .size = PAGE_CACHE_SIZE, + }, +}; + +static void ecryptfs_free_kmem_caches(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { + struct ecryptfs_cache_info *info; + + info = &ecryptfs_cache_infos[i]; + if (*(info->cache)) + kmem_cache_destroy(*(info->cache)); + } +} + +/** + * ecryptfs_init_kmem_caches + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_init_kmem_caches(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { + struct ecryptfs_cache_info *info; + + info = &ecryptfs_cache_infos[i]; + *(info->cache) = kmem_cache_create(info->name, info->size, + 0, SLAB_HWCACHE_ALIGN, info->ctor, NULL); + if (!*(info->cache)) { + ecryptfs_free_kmem_caches(); + ecryptfs_printk(KERN_WARNING, "%s: " + "kmem_cache_create failed\n", + info->name); + return -ENOMEM; + } + } + return 0; +} + +struct ecryptfs_obj { + char *name; + struct list_head slot_list; + struct kobject kobj; +}; + +struct ecryptfs_attribute { + struct attribute attr; + ssize_t(*show) (struct ecryptfs_obj *, char *); + ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t); +}; + +static ssize_t +ecryptfs_attr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t len) +{ + struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, + kobj); + struct ecryptfs_attribute *attribute = + container_of(attr, struct ecryptfs_attribute, attr); + + return (attribute->store ? attribute->store(obj, buf, len) : 0); +} + +static ssize_t +ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj, + kobj); + struct ecryptfs_attribute *attribute = + container_of(attr, struct ecryptfs_attribute, attr); + + return (attribute->show ? attribute->show(obj, buf) : 0); +} + +static struct sysfs_ops ecryptfs_sysfs_ops = { + .show = ecryptfs_attr_show, + .store = ecryptfs_attr_store +}; + +static struct kobj_type ecryptfs_ktype = { + .sysfs_ops = &ecryptfs_sysfs_ops +}; + +static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL); + +static ssize_t version_show(struct ecryptfs_obj *obj, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK); +} + +static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version); + +struct ecryptfs_version_str_map_elem { + u32 flag; + char *str; +} ecryptfs_version_str_map[] = { + {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"}, + {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, + {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, + {ECRYPTFS_VERSIONING_POLICY, "policy"} +}; + +static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) +{ + int i; + int remaining = PAGE_SIZE; + int total_written = 0; + + buff[0] = '\0'; + for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) { + int entry_size; + + if (!(ECRYPTFS_VERSIONING_MASK + & ecryptfs_version_str_map[i].flag)) + continue; + entry_size = strlen(ecryptfs_version_str_map[i].str); + if ((entry_size + 2) > remaining) + goto out; + memcpy(buff, ecryptfs_version_str_map[i].str, entry_size); + buff[entry_size++] = '\n'; + buff[entry_size] = '\0'; + buff += entry_size; + total_written += entry_size; + remaining -= entry_size; + } +out: + return total_written; +} + +static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str); + +static int do_sysfs_registration(void) +{ + int rc; + + if ((rc = subsystem_register(&ecryptfs_subsys))) { + printk(KERN_ERR + "Unable to register ecryptfs sysfs subsystem\n"); + goto out; + } + rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version.attr); + if (rc) { + printk(KERN_ERR + "Unable to create ecryptfs version attribute\n"); + subsystem_unregister(&ecryptfs_subsys); + goto out; + } + rc = sysfs_create_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version_str.attr); + if (rc) { + printk(KERN_ERR + "Unable to create ecryptfs version_str attribute\n"); + sysfs_remove_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version.attr); + subsystem_unregister(&ecryptfs_subsys); + goto out; + } +out: + return rc; +} + +static int __init ecryptfs_init(void) +{ + int rc; + + if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) { + rc = -EINVAL; + ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is " + "larger than the host's page size, and so " + "eCryptfs cannot run on this system. The " + "default eCryptfs extent size is [%d] bytes; " + "the page size is [%d] bytes.\n", + ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE); + goto out; + } + rc = ecryptfs_init_kmem_caches(); + if (rc) { + printk(KERN_ERR + "Failed to allocate one or more kmem_cache objects\n"); + goto out; + } + rc = register_filesystem(&ecryptfs_fs_type); + if (rc) { + printk(KERN_ERR "Failed to register filesystem\n"); + ecryptfs_free_kmem_caches(); + goto out; + } + kset_set_kset_s(&ecryptfs_subsys, fs_subsys); + sysfs_attr_version.attr.owner = THIS_MODULE; + sysfs_attr_version_str.attr.owner = THIS_MODULE; + rc = do_sysfs_registration(); + if (rc) { + printk(KERN_ERR "sysfs registration failed\n"); + unregister_filesystem(&ecryptfs_fs_type); + ecryptfs_free_kmem_caches(); + goto out; + } +out: + return rc; +} + +static void __exit ecryptfs_exit(void) +{ + sysfs_remove_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version.attr); + sysfs_remove_file(&ecryptfs_subsys.kset.kobj, + &sysfs_attr_version_str.attr); + subsystem_unregister(&ecryptfs_subsys); + unregister_filesystem(&ecryptfs_fs_type); + ecryptfs_free_kmem_caches(); +} + +MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>"); +MODULE_DESCRIPTION("eCryptfs"); + +MODULE_LICENSE("GPL"); + +module_init(ecryptfs_init) +module_exit(ecryptfs_exit) diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c new file mode 100644 index 000000000000..924dd90a4cf5 --- /dev/null +++ b/fs/ecryptfs/mmap.c @@ -0,0 +1,788 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * This is where eCryptfs coordinates the symmetric encryption and + * decryption of the file data as it passes between the lower + * encrypted file and the upper decrypted file. + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/pagemap.h> +#include <linux/writeback.h> +#include <linux/page-flags.h> +#include <linux/mount.h> +#include <linux/file.h> +#include <linux/crypto.h> +#include <linux/scatterlist.h> +#include "ecryptfs_kernel.h" + +struct kmem_cache *ecryptfs_lower_page_cache; + +/** + * ecryptfs_get1page + * + * Get one page from cache or lower f/s, return error otherwise. + * + * Returns unlocked and up-to-date page (if ok), with increased + * refcnt. + */ +static struct page *ecryptfs_get1page(struct file *file, int index) +{ + struct page *page; + struct dentry *dentry; + struct inode *inode; + struct address_space *mapping; + + dentry = file->f_dentry; + inode = dentry->d_inode; + mapping = inode->i_mapping; + page = read_cache_page(mapping, index, + (filler_t *)mapping->a_ops->readpage, + (void *)file); + if (IS_ERR(page)) + goto out; + wait_on_page_locked(page); +out: + return page; +} + +static +int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); + +/** + * ecryptfs_fill_zeros + * @file: The ecryptfs file + * @new_length: The new length of the data in the underlying file; + * everything between the prior end of the file and the + * new end of the file will be filled with zero's. + * new_length must be greater than current length + * + * Function for handling lseek-ing past the end of the file. + * + * This function does not support shrinking, only growing a file. + * + * Returns zero on success; non-zero otherwise. + */ +int ecryptfs_fill_zeros(struct file *file, loff_t new_length) +{ + int rc = 0; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + pgoff_t old_end_page_index = 0; + pgoff_t index = old_end_page_index; + int old_end_pos_in_page = -1; + pgoff_t new_end_page_index; + int new_end_pos_in_page; + loff_t cur_length = i_size_read(inode); + + if (cur_length != 0) { + index = old_end_page_index = + ((cur_length - 1) >> PAGE_CACHE_SHIFT); + old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK); + } + new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT); + new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); + ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; " + "old_end_pos_in_page = [%d]; " + "new_end_page_index = [0x%.16x]; " + "new_end_pos_in_page = [%d]\n", + old_end_page_index, old_end_pos_in_page, + new_end_page_index, new_end_pos_in_page); + if (old_end_page_index == new_end_page_index) { + /* Start and end are in the same page; we just need to + * set a portion of the existing page to zero's */ + rc = write_zeros(file, index, (old_end_pos_in_page + 1), + (new_end_pos_in_page - old_end_pos_in_page)); + if (rc) + ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + "index=[0x%.16x], " + "old_end_pos_in_page=[d], " + "(PAGE_CACHE_SIZE - new_end_pos_in_page" + "=[%d]" + ")=[d]) returned [%d]\n", file, index, + old_end_pos_in_page, + new_end_pos_in_page, + (PAGE_CACHE_SIZE - new_end_pos_in_page), + rc); + goto out; + } + /* Fill the remainder of the previous last page with zeros */ + rc = write_zeros(file, index, (old_end_pos_in_page + 1), + ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page)); + if (rc) { + ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + "index=[0x%.16x], old_end_pos_in_page=[d], " + "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) " + "returned [%d]\n", file, index, + old_end_pos_in_page, + (PAGE_CACHE_SIZE - old_end_pos_in_page), rc); + goto out; + } + index++; + while (index < new_end_page_index) { + /* Fill all intermediate pages with zeros */ + rc = write_zeros(file, index, 0, PAGE_CACHE_SIZE); + if (rc) { + ecryptfs_printk(KERN_ERR, "write_zeros(file=[%p], " + "index=[0x%.16x], " + "old_end_pos_in_page=[d], " + "(PAGE_CACHE_SIZE - new_end_pos_in_page" + "=[%d]" + ")=[d]) returned [%d]\n", file, index, + old_end_pos_in_page, + new_end_pos_in_page, + (PAGE_CACHE_SIZE - new_end_pos_in_page), + rc); + goto out; + } + index++; + } + /* Fill the portion at the beginning of the last new page with + * zero's */ + rc = write_zeros(file, index, 0, (new_end_pos_in_page + 1)); + if (rc) { + ecryptfs_printk(KERN_ERR, "write_zeros(file=" + "[%p], index=[0x%.16x], 0, " + "new_end_pos_in_page=[%d]" + "returned [%d]\n", file, index, + new_end_pos_in_page, rc); + goto out; + } +out: + return rc; +} + +/** + * ecryptfs_writepage + * @page: Page that is locked before this call is made + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct ecryptfs_page_crypt_context ctx; + int rc; + + ctx.page = page; + ctx.mode = ECRYPTFS_WRITEPAGE_MODE; + ctx.param.wbc = wbc; + rc = ecryptfs_encrypt_page(&ctx); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error encrypting " + "page (upper index [0x%.16x])\n", page->index); + ClearPageUptodate(page); + goto out; + } + SetPageUptodate(page); + unlock_page(page); +out: + return rc; +} + +/** + * Reads the data from the lower file file at index lower_page_index + * and copies that data into page. + * + * @param page Page to fill + * @param lower_page_index Index of the page in the lower file to get + */ +int ecryptfs_do_readpage(struct file *file, struct page *page, + pgoff_t lower_page_index) +{ + int rc; + struct dentry *dentry; + struct file *lower_file; + struct dentry *lower_dentry; + struct inode *inode; + struct inode *lower_inode; + char *page_data; + struct page *lower_page = NULL; + char *lower_page_data; + const struct address_space_operations *lower_a_ops; + + dentry = file->f_dentry; + lower_file = ecryptfs_file_to_lower(file); + lower_dentry = ecryptfs_dentry_to_lower(dentry); + inode = dentry->d_inode; + lower_inode = ecryptfs_inode_to_lower(inode); + lower_a_ops = lower_inode->i_mapping->a_ops; + lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index, + (filler_t *)lower_a_ops->readpage, + (void *)lower_file); + if (IS_ERR(lower_page)) { + rc = PTR_ERR(lower_page); + lower_page = NULL; + ecryptfs_printk(KERN_ERR, "Error reading from page cache\n"); + goto out; + } + wait_on_page_locked(lower_page); + page_data = (char *)kmap(page); + if (!page_data) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error mapping page\n"); + goto out; + } + lower_page_data = (char *)kmap(lower_page); + if (!lower_page_data) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error mapping page\n"); + kunmap(page); + goto out; + } + memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); + kunmap(lower_page); + kunmap(page); + rc = 0; +out: + if (likely(lower_page)) + page_cache_release(lower_page); + if (rc == 0) + SetPageUptodate(page); + else + ClearPageUptodate(page); + return rc; +} + +/** + * ecryptfs_readpage + * @file: This is an ecryptfs file + * @page: ecryptfs associated page to stick the read data into + * + * Read in a page, decrypting if necessary. + * + * Returns zero on success; non-zero on error. + */ +static int ecryptfs_readpage(struct file *file, struct page *page) +{ + int rc = 0; + struct ecryptfs_crypt_stat *crypt_stat; + + BUG_ON(!(file && file->f_dentry && file->f_dentry->d_inode)); + crypt_stat = + &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat; + if (!crypt_stat + || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED) + || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { + ecryptfs_printk(KERN_DEBUG, + "Passing through unencrypted page\n"); + rc = ecryptfs_do_readpage(file, page, page->index); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error reading page; rc = " + "[%d]\n", rc); + goto out; + } + } else { + rc = ecryptfs_decrypt_page(file, page); + if (rc) { + + ecryptfs_printk(KERN_ERR, "Error decrypting page; " + "rc = [%d]\n", rc); + goto out; + } + } + SetPageUptodate(page); +out: + if (rc) + ClearPageUptodate(page); + ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", + page->index); + unlock_page(page); + return rc; +} + +static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) +{ + struct inode *inode = page->mapping->host; + int end_byte_in_page; + int rc = 0; + char *page_virt; + + if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) { + end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; + if (to > end_byte_in_page) + end_byte_in_page = to; + page_virt = kmap(page); + if (!page_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_WARNING, + "Could not map page\n"); + goto out; + } + memset((page_virt + end_byte_in_page), 0, + (PAGE_CACHE_SIZE - end_byte_in_page)); + kunmap(page); + } +out: + return rc; +} + +static int ecryptfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + int rc = 0; + + kmap(page); + if (from == 0 && to == PAGE_CACHE_SIZE) + goto out; /* If we are writing a full page, it will be + up to date. */ + if (!PageUptodate(page)) + rc = ecryptfs_do_readpage(file, page, page->index); +out: + return rc; +} + +int ecryptfs_grab_and_map_lower_page(struct page **lower_page, + char **lower_virt, + struct inode *lower_inode, + unsigned long lower_page_index) +{ + int rc = 0; + + (*lower_page) = grab_cache_page(lower_inode->i_mapping, + lower_page_index); + if (!(*lower_page)) { + ecryptfs_printk(KERN_ERR, "grab_cache_page for " + "lower_page_index = [0x%.16x] failed\n", + lower_page_index); + rc = -EINVAL; + goto out; + } + if (lower_virt) + (*lower_virt) = kmap((*lower_page)); + else + kmap((*lower_page)); +out: + return rc; +} + +int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, + struct inode *lower_inode, + struct writeback_control *wbc) +{ + int rc = 0; + + rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); " + "rc = [%d]\n", rc); + goto out; + } + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + page_cache_release(lower_page); +out: + return rc; +} + +static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page) +{ + kunmap(lower_page); + ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = " + "[0x%.16x]\n", lower_page->index); + unlock_page(lower_page); + page_cache_release(lower_page); +} + +/** + * ecryptfs_write_inode_size_to_header + * + * Writes the lower file size to the first 8 bytes of the header. + * + * Returns zero on success; non-zero on error. + */ +int +ecryptfs_write_inode_size_to_header(struct file *lower_file, + struct inode *lower_inode, + struct inode *inode) +{ + int rc = 0; + struct page *header_page; + char *header_virt; + const struct address_space_operations *lower_a_ops; + u64 file_size; + + rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt, + lower_inode, 0); + if (rc) { + ecryptfs_printk(KERN_ERR, "grab_cache_page for header page " + "failed\n"); + goto out; + } + lower_a_ops = lower_inode->i_mapping->a_ops; + rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8); + file_size = (u64)i_size_read(inode); + ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size); + file_size = cpu_to_be64(file_size); + memcpy(header_virt, &file_size, sizeof(u64)); + rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); + if (rc < 0) + ecryptfs_printk(KERN_ERR, "Error commiting header page " + "write\n"); + ecryptfs_unmap_and_release_lower_page(header_page); + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); +out: + return rc; +} + +int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, + struct file *lower_file, + unsigned long lower_page_index, int byte_offset, + int region_bytes) +{ + int rc = 0; + + rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode, + lower_page_index); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to grab and map " + "lower page with index [0x%.16x]\n", + lower_page_index); + goto out; + } + rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file, + (*lower_page), + byte_offset, + region_bytes); + if (rc) { + ecryptfs_printk(KERN_ERR, "prepare_write for " + "lower_page_index = [0x%.16x] failed; rc = " + "[%d]\n", lower_page_index, rc); + } +out: + if (rc && (*lower_page)) { + ecryptfs_unmap_and_release_lower_page(*lower_page); + (*lower_page) = NULL; + } + return rc; +} + +/** + * ecryptfs_commit_lower_page + * + * Returns zero on success; non-zero on error + */ +int +ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, + struct file *lower_file, int byte_offset, + int region_size) +{ + int rc = 0; + + rc = lower_inode->i_mapping->a_ops->commit_write( + lower_file, lower_page, byte_offset, region_size); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, + "Error committing write; rc = [%d]\n", rc); + } else + rc = 0; + ecryptfs_unmap_and_release_lower_page(lower_page); + return rc; +} + +/** + * ecryptfs_copy_page_to_lower + * + * Used for plaintext pass-through; no page index interpolation + * required. + */ +int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, + struct file *lower_file) +{ + int rc = 0; + struct page *lower_page; + + rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file, + page->index, 0, PAGE_CACHE_SIZE); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to get page " + "at index [0x%.16x]\n", page->index); + goto out; + } + /* TODO: aops */ + memcpy((char *)page_address(lower_page), page_address(page), + PAGE_CACHE_SIZE); + rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file, + 0, PAGE_CACHE_SIZE); + if (rc) + ecryptfs_printk(KERN_ERR, "Error attempting to commit page " + "at index [0x%.16x]\n", page->index); +out: + return rc; +} + +static int +process_new_file(struct ecryptfs_crypt_stat *crypt_stat, + struct file *file, struct inode *inode) +{ + struct page *header_page; + const struct address_space_operations *lower_a_ops; + struct inode *lower_inode; + struct file *lower_file; + char *header_virt; + int rc = 0; + int current_header_page = 0; + int header_pages; + int more_header_data_to_be_written = 1; + + lower_inode = ecryptfs_inode_to_lower(inode); + lower_file = ecryptfs_file_to_lower(file); + lower_a_ops = lower_inode->i_mapping->a_ops; + header_pages = ((crypt_stat->header_extent_size + * crypt_stat->num_header_extents_at_front) + / PAGE_CACHE_SIZE); + BUG_ON(header_pages < 1); + while (current_header_page < header_pages) { + rc = ecryptfs_grab_and_map_lower_page(&header_page, + &header_virt, + lower_inode, + current_header_page); + if (rc) { + ecryptfs_printk(KERN_ERR, "grab_cache_page for " + "header page [%d] failed; rc = [%d]\n", + current_header_page, rc); + goto out; + } + rc = lower_a_ops->prepare_write(lower_file, header_page, 0, + PAGE_CACHE_SIZE); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error preparing to write " + "header page out; rc = [%d]\n", rc); + goto out; + } + memset(header_virt, 0, PAGE_CACHE_SIZE); + if (more_header_data_to_be_written) { + rc = ecryptfs_write_headers_virt(header_virt, + crypt_stat, + file->f_dentry); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error " + "generating header; rc = " + "[%d]\n", rc); + rc = -EIO; + memset(header_virt, 0, PAGE_CACHE_SIZE); + ecryptfs_unmap_and_release_lower_page( + header_page); + goto out; + } + if (current_header_page == 0) + memset(header_virt, 0, 8); + more_header_data_to_be_written = 0; + } + rc = lower_a_ops->commit_write(lower_file, header_page, 0, + PAGE_CACHE_SIZE); + ecryptfs_unmap_and_release_lower_page(header_page); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, + "Error commiting header page write; " + "rc = [%d]\n", rc); + break; + } + current_header_page++; + } + if (rc >= 0) { + rc = 0; + ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = " + "[0x%.16x]\n", lower_inode->i_blocks); + i_size_write(inode, 0); + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + } + ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in " + "crypt_stat at memory location [%p]\n", crypt_stat); + ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE); +out: + return rc; +} + +/** + * ecryptfs_commit_write + * @file: The eCryptfs file object + * @page: The eCryptfs page + * @from: Ignored (we rotate the page IV on each write) + * @to: Ignored + * + * This is where we encrypt the data and pass the encrypted data to + * the lower filesystem. In OpenPGP-compatible mode, we operate on + * entire underlying packets. + */ +static int ecryptfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct ecryptfs_page_crypt_context ctx; + loff_t pos; + struct inode *inode; + struct inode *lower_inode; + struct file *lower_file; + struct ecryptfs_crypt_stat *crypt_stat; + int rc; + + inode = page->mapping->host; + lower_inode = ecryptfs_inode_to_lower(inode); + lower_file = ecryptfs_file_to_lower(file); + mutex_lock(&lower_inode->i_mutex); + crypt_stat = + &ecryptfs_inode_to_private(file->f_dentry->d_inode)->crypt_stat; + if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) { + ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in " + "crypt_stat at memory location [%p]\n", crypt_stat); + rc = process_new_file(crypt_stat, file, inode); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error processing new " + "file; rc = [%d]\n", rc); + goto out; + } + } else + ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); + ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" + "(page w/ index = [0x%.16x], to = [%d])\n", page->index, + to); + rc = fill_zeros_to_end_of_page(page, to); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error attempting to fill " + "zeros in page with index = [0x%.16x]\n", + page->index); + goto out; + } + ctx.page = page; + ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE; + ctx.param.lower_file = lower_file; + rc = ecryptfs_encrypt_page(&ctx); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " + "index [0x%.16x])\n", page->index); + goto out; + } + rc = 0; + inode->i_blocks = lower_inode->i_blocks; + pos = (page->index << PAGE_CACHE_SHIFT) + to; + if (pos > i_size_read(inode)) { + i_size_write(inode, pos); + ecryptfs_printk(KERN_DEBUG, "Expanded file size to " + "[0x%.16x]\n", i_size_read(inode)); + } + ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode); + lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(inode); +out: + kunmap(page); /* mapped in prior call (prepare_write) */ + if (rc < 0) + ClearPageUptodate(page); + else + SetPageUptodate(page); + mutex_unlock(&lower_inode->i_mutex); + return rc; +} + +/** + * write_zeros + * @file: The ecryptfs file + * @index: The index in which we are writing + * @start: The position after the last block of data + * @num_zeros: The number of zeros to write + * + * Write a specified number of zero's to a page. + * + * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE + */ +static +int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) +{ + int rc = 0; + struct page *tmp_page; + + tmp_page = ecryptfs_get1page(file, index); + if (IS_ERR(tmp_page)) { + ecryptfs_printk(KERN_ERR, "Error getting page at index " + "[0x%.16x]\n", index); + rc = PTR_ERR(tmp_page); + goto out; + } + kmap(tmp_page); + rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error preparing to write zero's " + "to remainder of page at index [0x%.16x]\n", + index); + kunmap(tmp_page); + page_cache_release(tmp_page); + goto out; + } + memset(((char *)page_address(tmp_page) + start), 0, num_zeros); + rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " + "to remainder of page at index [0x%.16x]\n", + index); + kunmap(tmp_page); + page_cache_release(tmp_page); + goto out; + } + rc = 0; + kunmap(tmp_page); + page_cache_release(tmp_page); +out: + return rc; +} + +static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) +{ + int rc = 0; + struct inode *inode; + struct inode *lower_inode; + + inode = (struct inode *)mapping->host; + lower_inode = ecryptfs_inode_to_lower(inode); + if (lower_inode->i_mapping->a_ops->bmap) + rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping, + block); + return rc; +} + +static void ecryptfs_sync_page(struct page *page) +{ + struct inode *inode; + struct inode *lower_inode; + struct page *lower_page; + + inode = page->mapping->host; + lower_inode = ecryptfs_inode_to_lower(inode); + /* NOTE: Recently swapped with grab_cache_page(), since + * sync_page() just makes sure that pending I/O gets done. */ + lower_page = find_lock_page(lower_inode->i_mapping, page->index); + if (!lower_page) { + ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n"); + return; + } + lower_page->mapping->a_ops->sync_page(lower_page); + ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", + lower_page->index); + unlock_page(lower_page); + page_cache_release(lower_page); +} + +struct address_space_operations ecryptfs_aops = { + .writepage = ecryptfs_writepage, + .readpage = ecryptfs_readpage, + .prepare_write = ecryptfs_prepare_write, + .commit_write = ecryptfs_commit_write, + .bmap = ecryptfs_bmap, + .sync_page = ecryptfs_sync_page, +}; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c new file mode 100644 index 000000000000..c337c0410fb1 --- /dev/null +++ b/fs/ecryptfs/super.c @@ -0,0 +1,198 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 1997-2003 Erez Zadok + * Copyright (C) 2001-2003 Stony Brook University + * Copyright (C) 2004-2006 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * Michael C. Thompson <mcthomps@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/key.h> +#include <linux/seq_file.h> +#include <linux/crypto.h> +#include "ecryptfs_kernel.h" + +struct kmem_cache *ecryptfs_inode_info_cache; + +/** + * ecryptfs_alloc_inode - allocate an ecryptfs inode + * @sb: Pointer to the ecryptfs super block + * + * Called to bring an inode into existence. + * + * Only handle allocation, setting up structures should be done in + * ecryptfs_read_inode. This is because the kernel, between now and + * then, will 0 out the private data pointer. + * + * Returns a pointer to a newly allocated inode, NULL otherwise + */ +static struct inode *ecryptfs_alloc_inode(struct super_block *sb) +{ + struct ecryptfs_inode_info *ecryptfs_inode; + struct inode *inode = NULL; + + ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache, + SLAB_KERNEL); + if (unlikely(!ecryptfs_inode)) + goto out; + ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat); + inode = &ecryptfs_inode->vfs_inode; +out: + return inode; +} + +/** + * ecryptfs_destroy_inode + * @inode: The ecryptfs inode + * + * This is used during the final destruction of the inode. + * All allocation of memory related to the inode, including allocated + * memory in the crypt_stat struct, will be released here. + * There should be no chance that this deallocation will be missed. + */ +static void ecryptfs_destroy_inode(struct inode *inode) +{ + struct ecryptfs_inode_info *inode_info; + + inode_info = ecryptfs_inode_to_private(inode); + ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat); + kmem_cache_free(ecryptfs_inode_info_cache, inode_info); +} + +/** + * ecryptfs_init_inode + * @inode: The ecryptfs inode + * + * Set up the ecryptfs inode. + */ +void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) +{ + ecryptfs_set_inode_lower(inode, lower_inode); + inode->i_ino = lower_inode->i_ino; + inode->i_version++; + inode->i_op = &ecryptfs_main_iops; + inode->i_fop = &ecryptfs_main_fops; + inode->i_mapping->a_ops = &ecryptfs_aops; +} + +/** + * ecryptfs_put_super + * @sb: Pointer to the ecryptfs super block + * + * Final actions when unmounting a file system. + * This will handle deallocation and release of our private data. + */ +static void ecryptfs_put_super(struct super_block *sb) +{ + struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); + + ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat); + kmem_cache_free(ecryptfs_sb_info_cache, sb_info); + ecryptfs_set_superblock_private(sb, NULL); +} + +/** + * ecryptfs_statfs + * @sb: The ecryptfs super block + * @buf: The struct kstatfs to fill in with stats + * + * Get the filesystem statistics. Currently, we let this pass right through + * to the lower filesystem and take no action ourselves. + */ +static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf); +} + +/** + * ecryptfs_clear_inode + * @inode - The ecryptfs inode + * + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. We use this to drop out reference to the + * lower inode. + */ +static void ecryptfs_clear_inode(struct inode *inode) +{ + iput(ecryptfs_inode_to_lower(inode)); +} + +/** + * ecryptfs_umount_begin + * + * Called in do_umount(). + */ +static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags) +{ + struct vfsmount *lower_mnt = + ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root); + struct super_block *lower_sb; + + mntput(lower_mnt); + lower_sb = lower_mnt->mnt_sb; + if (lower_sb->s_op->umount_begin) + lower_sb->s_op->umount_begin(lower_mnt, flags); +} + +/** + * ecryptfs_show_options + * + * Prints the directory we are currently mounted over. + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct super_block *sb = mnt->mnt_sb; + struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root); + struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root); + char *tmp_page; + char *path; + int rc = 0; + + tmp_page = (char *)__get_free_page(GFP_KERNEL); + if (!tmp_page) { + rc = -ENOMEM; + goto out; + } + path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE); + if (IS_ERR(path)) { + rc = PTR_ERR(path); + goto out; + } + seq_printf(m, ",dir=%s", path); + free_page((unsigned long)tmp_page); +out: + return rc; +} + +struct super_operations ecryptfs_sops = { + .alloc_inode = ecryptfs_alloc_inode, + .destroy_inode = ecryptfs_destroy_inode, + .drop_inode = generic_delete_inode, + .put_super = ecryptfs_put_super, + .statfs = ecryptfs_statfs, + .remount_fs = NULL, + .clear_inode = ecryptfs_clear_inode, + .umount_begin = ecryptfs_umount_begin, + .show_options = ecryptfs_show_options +}; diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 87e1d03e8267..e8c7765419e8 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -144,42 +144,12 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) */ /* - * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, - * that we mark locks for reclaiming, and that we bump the pseudo NSM state. - */ -static void nlmclnt_prepare_reclaim(struct nlm_host *host) -{ - down_write(&host->h_rwsem); - host->h_monitored = 0; - host->h_state++; - host->h_nextrebind = 0; - nlm_rebind_host(host); - - /* - * Mark the locks for reclaiming. - */ - list_splice_init(&host->h_granted, &host->h_reclaim); - - dprintk("NLM: reclaiming locks for host %s\n", host->h_name); -} - -static void nlmclnt_finish_reclaim(struct nlm_host *host) -{ - host->h_reclaiming = 0; - up_write(&host->h_rwsem); - dprintk("NLM: done reclaiming locks for host %s", host->h_name); -} - -/* * Reclaim all locks on server host. We do this by spawning a separate * reclaimer thread. */ void -nlmclnt_recovery(struct nlm_host *host, u32 newstate) +nlmclnt_recovery(struct nlm_host *host) { - if (host->h_nsmstate == newstate) - return; - host->h_nsmstate = newstate; if (!host->h_reclaiming++) { nlm_get_host(host); __module_get(THIS_MODULE); @@ -199,18 +169,30 @@ reclaimer(void *ptr) daemonize("%s-reclaim", host->h_name); allow_signal(SIGKILL); + down_write(&host->h_rwsem); + /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ lock_kernel(); lockd_up(0); /* note: this cannot fail as lockd is already running */ - nlmclnt_prepare_reclaim(host); - /* First, reclaim all locks that have been marked. */ + dprintk("lockd: reclaiming locks for host %s", host->h_name); + restart: nsmstate = host->h_nsmstate; + + /* Force a portmap getport - the peer's lockd will + * most likely end up on a different port. + */ + host->h_nextrebind = jiffies; + nlm_rebind_host(host); + + /* First, reclaim all locks that have been granted. */ + list_splice_init(&host->h_granted, &host->h_reclaim); list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { list_del_init(&fl->fl_u.nfs_fl.list); + /* Why are we leaking memory here? --okir */ if (signalled()) continue; if (nlmclnt_reclaim(host, fl) != 0) @@ -218,11 +200,13 @@ restart: list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); if (host->h_nsmstate != nsmstate) { /* Argh! The server rebooted again! */ - list_splice_init(&host->h_granted, &host->h_reclaim); goto restart; } } - nlmclnt_finish_reclaim(host); + + host->h_reclaiming = 0; + up_write(&host->h_rwsem); + dprintk("NLM: done reclaiming locks for host %s", host->h_name); /* Now, wake up all processes that sleep on a blocked lock */ list_for_each_entry(block, &nlm_blocked, b_list) { diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 0116729cec5f..3d84f600b633 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -36,14 +36,14 @@ static const struct rpc_call_ops nlmclnt_cancel_ops; /* * Cookie counter for NLM requests */ -static u32 nlm_cookie = 0x1234; +static atomic_t nlm_cookie = ATOMIC_INIT(0x1234); -static inline void nlmclnt_next_cookie(struct nlm_cookie *c) +void nlmclnt_next_cookie(struct nlm_cookie *c) { - memcpy(c->data, &nlm_cookie, 4); - memset(c->data+4, 0, 4); + u32 cookie = atomic_inc_return(&nlm_cookie); + + memcpy(c->data, &cookie, 4); c->len=4; - nlm_cookie++; } static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) @@ -153,6 +153,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) { struct rpc_clnt *client = NFS_CLIENT(inode); struct sockaddr_in addr; + struct nfs_server *nfssrv = NFS_SERVER(inode); struct nlm_host *host; struct nlm_rqst *call; sigset_t oldset; @@ -166,7 +167,9 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) } rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); - host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers); + host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers, + nfssrv->nfs_client->cl_hostname, + strlen(nfssrv->nfs_client->cl_hostname)); if (host == NULL) return -ENOLCK; @@ -499,7 +502,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) unsigned char fl_flags = fl->fl_flags; int status = -ENOLCK; - if (!host->h_monitored && nsm_monitor(host) < 0) { + if (nsm_monitor(host) < 0) { printk(KERN_NOTICE "lockd: failed to monitor %s\n", host->h_name); goto out; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index a0d0b58ce7a4..fb24a9730345 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -27,46 +27,60 @@ #define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) #define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) -static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; +static struct hlist_head nlm_hosts[NLM_HOST_NRHASH]; static unsigned long next_gc; static int nrhosts; static DEFINE_MUTEX(nlm_host_mutex); static void nlm_gc_hosts(void); +static struct nsm_handle * __nsm_find(const struct sockaddr_in *, + const char *, int, int); /* * Find an NLM server handle in the cache. If there is none, create it. */ struct nlm_host * -nlmclnt_lookup_host(struct sockaddr_in *sin, int proto, int version) +nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, + const char *hostname, int hostname_len) { - return nlm_lookup_host(0, sin, proto, version); + return nlm_lookup_host(0, sin, proto, version, + hostname, hostname_len); } /* * Find an NLM client handle in the cache. If there is none, create it. */ struct nlm_host * -nlmsvc_lookup_host(struct svc_rqst *rqstp) +nlmsvc_lookup_host(struct svc_rqst *rqstp, + const char *hostname, int hostname_len) { return nlm_lookup_host(1, &rqstp->rq_addr, - rqstp->rq_prot, rqstp->rq_vers); + rqstp->rq_prot, rqstp->rq_vers, + hostname, hostname_len); } /* * Common host lookup routine for server & client */ struct nlm_host * -nlm_lookup_host(int server, struct sockaddr_in *sin, - int proto, int version) +nlm_lookup_host(int server, const struct sockaddr_in *sin, + int proto, int version, + const char *hostname, + int hostname_len) { - struct nlm_host *host, **hp; - u32 addr; + struct hlist_head *chain; + struct hlist_node *pos; + struct nlm_host *host; + struct nsm_handle *nsm = NULL; int hash; - dprintk("lockd: nlm_lookup_host(%08x, p=%d, v=%d)\n", - (unsigned)(sin? ntohl(sin->sin_addr.s_addr) : 0), proto, version); + dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n", + NIPQUAD(sin->sin_addr.s_addr), proto, version, + server? "server" : "client", + hostname_len, + hostname? hostname : "<none>"); + hash = NLM_ADDRHASH(sin->sin_addr.s_addr); @@ -76,7 +90,22 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, if (time_after_eq(jiffies, next_gc)) nlm_gc_hosts(); - for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { + /* We may keep several nlm_host objects for a peer, because each + * nlm_host is identified by + * (address, protocol, version, server/client) + * We could probably simplify this a little by putting all those + * different NLM rpc_clients into one single nlm_host object. + * This would allow us to have one nlm_host per address. + */ + chain = &nlm_hosts[hash]; + hlist_for_each_entry(host, pos, chain, h_hash) { + if (!nlm_cmp_addr(&host->h_addr, sin)) + continue; + + /* See if we have an NSM handle for this client */ + if (!nsm) + nsm = host->h_nsmhandle; + if (host->h_proto != proto) continue; if (host->h_version != version) @@ -84,28 +113,30 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, if (host->h_server != server) continue; - if (nlm_cmp_addr(&host->h_addr, sin)) { - if (hp != nlm_hosts + hash) { - *hp = host->h_next; - host->h_next = nlm_hosts[hash]; - nlm_hosts[hash] = host; - } - nlm_get_host(host); - mutex_unlock(&nlm_host_mutex); - return host; - } - } + /* Move to head of hash chain. */ + hlist_del(&host->h_hash); + hlist_add_head(&host->h_hash, chain); - /* Ooops, no host found, create it */ - dprintk("lockd: creating host entry\n"); + nlm_get_host(host); + goto out; + } + if (nsm) + atomic_inc(&nsm->sm_count); - host = kzalloc(sizeof(*host), GFP_KERNEL); - if (!host) - goto nohost; + host = NULL; - addr = sin->sin_addr.s_addr; - sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr)); + /* Sadly, the host isn't in our hash table yet. See if + * we have an NSM handle for it. If not, create one. + */ + if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len))) + goto out; + host = kzalloc(sizeof(*host), GFP_KERNEL); + if (!host) { + nsm_release(nsm); + goto out; + } + host->h_name = nsm->sm_name; host->h_addr = *sin; host->h_addr.sin_port = 0; /* ouch! */ host->h_version = version; @@ -119,9 +150,9 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, init_rwsem(&host->h_rwsem); host->h_state = 0; /* pseudo NSM state */ host->h_nsmstate = 0; /* real NSM state */ + host->h_nsmhandle = nsm; host->h_server = server; - host->h_next = nlm_hosts[hash]; - nlm_hosts[hash] = host; + hlist_add_head(&host->h_hash, chain); INIT_LIST_HEAD(&host->h_lockowners); spin_lock_init(&host->h_lock); INIT_LIST_HEAD(&host->h_granted); @@ -130,35 +161,39 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, if (++nrhosts > NLM_HOST_MAX) next_gc = 0; -nohost: +out: mutex_unlock(&nlm_host_mutex); return host; } -struct nlm_host * -nlm_find_client(void) +/* + * Destroy a host + */ +static void +nlm_destroy_host(struct nlm_host *host) { - /* find a nlm_host for a client for which h_killed == 0. - * and return it + struct rpc_clnt *clnt; + + BUG_ON(!list_empty(&host->h_lockowners)); + BUG_ON(atomic_read(&host->h_count)); + + /* + * Release NSM handle and unmonitor host. */ - int hash; - mutex_lock(&nlm_host_mutex); - for (hash = 0 ; hash < NLM_HOST_NRHASH; hash++) { - struct nlm_host *host, **hp; - for (hp = &nlm_hosts[hash]; (host = *hp) != 0; hp = &host->h_next) { - if (host->h_server && - host->h_killed == 0) { - nlm_get_host(host); - mutex_unlock(&nlm_host_mutex); - return host; - } + nsm_unmonitor(host); + + if ((clnt = host->h_rpcclnt) != NULL) { + if (atomic_read(&clnt->cl_users)) { + printk(KERN_WARNING + "lockd: active RPC handle\n"); + clnt->cl_dead = 1; + } else { + rpc_destroy_client(host->h_rpcclnt); } } - mutex_unlock(&nlm_host_mutex); - return NULL; + kfree(host); } - /* * Create the NLM RPC client for an NLM peer */ @@ -260,22 +295,82 @@ void nlm_release_host(struct nlm_host *host) } /* + * We were notified that the host indicated by address &sin + * has rebooted. + * Release all resources held by that peer. + */ +void nlm_host_rebooted(const struct sockaddr_in *sin, + const char *hostname, int hostname_len, + u32 new_state) +{ + struct hlist_head *chain; + struct hlist_node *pos; + struct nsm_handle *nsm; + struct nlm_host *host; + + dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n", + hostname, NIPQUAD(sin->sin_addr)); + + /* Find the NSM handle for this peer */ + if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0))) + return; + + /* When reclaiming locks on this peer, make sure that + * we set up a new notification */ + nsm->sm_monitored = 0; + + /* Mark all hosts tied to this NSM state as having rebooted. + * We run the loop repeatedly, because we drop the host table + * lock for this. + * To avoid processing a host several times, we match the nsmstate. + */ +again: mutex_lock(&nlm_host_mutex); + for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { + hlist_for_each_entry(host, pos, chain, h_hash) { + if (host->h_nsmhandle == nsm + && host->h_nsmstate != new_state) { + host->h_nsmstate = new_state; + host->h_state++; + + nlm_get_host(host); + mutex_unlock(&nlm_host_mutex); + + if (host->h_server) { + /* We're server for this guy, just ditch + * all the locks he held. */ + nlmsvc_free_host_resources(host); + } else { + /* He's the server, initiate lock recovery. */ + nlmclnt_recovery(host); + } + + nlm_release_host(host); + goto again; + } + } + } + + mutex_unlock(&nlm_host_mutex); +} + +/* * Shut down the hosts module. * Note that this routine is called only at server shutdown time. */ void nlm_shutdown_hosts(void) { + struct hlist_head *chain; + struct hlist_node *pos; struct nlm_host *host; - int i; dprintk("lockd: shutting down host module\n"); mutex_lock(&nlm_host_mutex); /* First, make all hosts eligible for gc */ dprintk("lockd: nuking all hosts...\n"); - for (i = 0; i < NLM_HOST_NRHASH; i++) { - for (host = nlm_hosts[i]; host; host = host->h_next) + for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { + hlist_for_each_entry(host, pos, chain, h_hash) host->h_expires = jiffies - 1; } @@ -287,8 +382,8 @@ nlm_shutdown_hosts(void) if (nrhosts) { printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); dprintk("lockd: %d hosts left:\n", nrhosts); - for (i = 0; i < NLM_HOST_NRHASH; i++) { - for (host = nlm_hosts[i]; host; host = host->h_next) { + for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { + hlist_for_each_entry(host, pos, chain, h_hash) { dprintk(" %s (cnt %d use %d exp %ld)\n", host->h_name, atomic_read(&host->h_count), host->h_inuse, host->h_expires); @@ -305,45 +400,32 @@ nlm_shutdown_hosts(void) static void nlm_gc_hosts(void) { - struct nlm_host **q, *host; - struct rpc_clnt *clnt; - int i; + struct hlist_head *chain; + struct hlist_node *pos, *next; + struct nlm_host *host; dprintk("lockd: host garbage collection\n"); - for (i = 0; i < NLM_HOST_NRHASH; i++) { - for (host = nlm_hosts[i]; host; host = host->h_next) + for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { + hlist_for_each_entry(host, pos, chain, h_hash) host->h_inuse = 0; } /* Mark all hosts that hold locks, blocks or shares */ nlmsvc_mark_resources(); - for (i = 0; i < NLM_HOST_NRHASH; i++) { - q = &nlm_hosts[i]; - while ((host = *q) != NULL) { + for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { + hlist_for_each_entry_safe(host, pos, next, chain, h_hash) { if (atomic_read(&host->h_count) || host->h_inuse || time_before(jiffies, host->h_expires)) { dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", host->h_name, atomic_read(&host->h_count), host->h_inuse, host->h_expires); - q = &host->h_next; continue; } dprintk("lockd: delete host %s\n", host->h_name); - *q = host->h_next; - /* Don't unmonitor hosts that have been invalidated */ - if (host->h_monitored && !host->h_killed) - nsm_unmonitor(host); - if ((clnt = host->h_rpcclnt) != NULL) { - if (atomic_read(&clnt->cl_users)) { - printk(KERN_WARNING - "lockd: active RPC handle\n"); - clnt->cl_dead = 1; - } else { - rpc_destroy_client(host->h_rpcclnt); - } - } - kfree(host); + hlist_del_init(&host->h_hash); + + nlm_destroy_host(host); nrhosts--; } } @@ -351,3 +433,88 @@ nlm_gc_hosts(void) next_gc = jiffies + NLM_HOST_COLLECT; } + +/* + * Manage NSM handles + */ +static LIST_HEAD(nsm_handles); +static DEFINE_MUTEX(nsm_mutex); + +static struct nsm_handle * +__nsm_find(const struct sockaddr_in *sin, + const char *hostname, int hostname_len, + int create) +{ + struct nsm_handle *nsm = NULL; + struct list_head *pos; + + if (!sin) + return NULL; + + if (hostname && memchr(hostname, '/', hostname_len) != NULL) { + if (printk_ratelimit()) { + printk(KERN_WARNING "Invalid hostname \"%.*s\" " + "in NFS lock request\n", + hostname_len, hostname); + } + return NULL; + } + + mutex_lock(&nsm_mutex); + list_for_each(pos, &nsm_handles) { + nsm = list_entry(pos, struct nsm_handle, sm_link); + + if (hostname && nsm_use_hostnames) { + if (strlen(nsm->sm_name) != hostname_len + || memcmp(nsm->sm_name, hostname, hostname_len)) + continue; + } else if (!nlm_cmp_addr(&nsm->sm_addr, sin)) + continue; + atomic_inc(&nsm->sm_count); + goto out; + } + + if (!create) { + nsm = NULL; + goto out; + } + + nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL); + if (nsm != NULL) { + nsm->sm_addr = *sin; + nsm->sm_name = (char *) (nsm + 1); + memcpy(nsm->sm_name, hostname, hostname_len); + nsm->sm_name[hostname_len] = '\0'; + atomic_set(&nsm->sm_count, 1); + + list_add(&nsm->sm_link, &nsm_handles); + } + +out: + mutex_unlock(&nsm_mutex); + return nsm; +} + +struct nsm_handle * +nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len) +{ + return __nsm_find(sin, hostname, hostname_len, 1); +} + +/* + * Release an NSM handle + */ +void +nsm_release(struct nsm_handle *nsm) +{ + if (!nsm) + return; + if (atomic_dec_and_test(&nsm->sm_count)) { + mutex_lock(&nsm_mutex); + if (atomic_read(&nsm->sm_count) == 0) { + list_del(&nsm->sm_link); + kfree(nsm); + } + mutex_unlock(&nsm_mutex); + } +} diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index a816b920d431..e0179f8c327f 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -24,13 +24,13 @@ static struct rpc_program nsm_program; /* * Local NSM state */ -u32 nsm_local_state; +int nsm_local_state; /* * Common procedure for SM_MON/SM_UNMON calls */ static int -nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) +nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) { struct rpc_clnt *clnt; int status; @@ -46,10 +46,11 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) goto out; } - args.addr = host->h_addr.sin_addr.s_addr; - args.proto= (host->h_proto<<1) | host->h_server; + memset(&args, 0, sizeof(args)); + args.mon_name = nsm->sm_name; + args.addr = nsm->sm_addr.sin_addr.s_addr; args.prog = NLM_PROGRAM; - args.vers = host->h_version; + args.vers = 3; args.proc = NLMPROC_NSM_NOTIFY; memset(res, 0, sizeof(*res)); @@ -70,17 +71,22 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) int nsm_monitor(struct nlm_host *host) { + struct nsm_handle *nsm = host->h_nsmhandle; struct nsm_res res; int status; dprintk("lockd: nsm_monitor(%s)\n", host->h_name); + BUG_ON(nsm == NULL); - status = nsm_mon_unmon(host, SM_MON, &res); + if (nsm->sm_monitored) + return 0; + + status = nsm_mon_unmon(nsm, SM_MON, &res); if (status < 0 || res.status != 0) printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name); else - host->h_monitored = 1; + nsm->sm_monitored = 1; return status; } @@ -90,16 +96,26 @@ nsm_monitor(struct nlm_host *host) int nsm_unmonitor(struct nlm_host *host) { + struct nsm_handle *nsm = host->h_nsmhandle; struct nsm_res res; - int status; - - dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name); - - status = nsm_mon_unmon(host, SM_UNMON, &res); - if (status < 0) - printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name); - else - host->h_monitored = 0; + int status = 0; + + if (nsm == NULL) + return 0; + host->h_nsmhandle = NULL; + + if (atomic_read(&nsm->sm_count) == 1 + && nsm->sm_monitored && !nsm->sm_sticky) { + dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name); + + status = nsm_mon_unmon(nsm, SM_UNMON, &res); + if (status < 0) + printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", + host->h_name); + else + nsm->sm_monitored = 0; + } + nsm_release(nsm); return status; } @@ -135,7 +151,7 @@ nsm_create(void) static u32 * xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) { - char buffer[20]; + char buffer[20], *name; /* * Use the dotted-quad IP address of the remote host as @@ -143,8 +159,13 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) * hostname first for whatever remote hostname it receives, * so this works alright. */ - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); - if (!(p = xdr_encode_string(p, buffer)) + if (nsm_use_hostnames) { + name = argp->mon_name; + } else { + sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); + name = buffer; + } + if (!(p = xdr_encode_string(p, name)) || !(p = xdr_encode_string(p, utsname()->nodename))) return ERR_PTR(-EIO); *p++ = htonl(argp->prog); @@ -160,9 +181,11 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) p = xdr_encode_common(rqstp, p, argp); if (IS_ERR(p)) return PTR_ERR(p); + + /* Surprise - there may even be room for an IPv6 address now */ *p++ = argp->addr; - *p++ = argp->vers; - *p++ = argp->proto; + *p++ = 0; + *p++ = 0; *p++ = 0; rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); return 0; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 3cc369e5693f..634139232aaf 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -33,6 +33,7 @@ #include <linux/sunrpc/svcsock.h> #include <net/ip.h> #include <linux/lockd/lockd.h> +#include <linux/lockd/sm_inter.h> #include <linux/nfs.h> #define NLMDBG_FACILITY NLMDBG_SVC @@ -61,6 +62,7 @@ static DECLARE_WAIT_QUEUE_HEAD(lockd_exit); static unsigned long nlm_grace_period; static unsigned long nlm_timeout = LOCKD_DFLT_TIMEO; static int nlm_udpport, nlm_tcpport; +int nsm_use_hostnames = 0; /* * Constants needed for the sysctl interface. @@ -395,6 +397,22 @@ static ctl_table nlm_sysctls[] = { .extra1 = (int *) &nlm_port_min, .extra2 = (int *) &nlm_port_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nsm_use_hostnames", + .data = &nsm_use_hostnames, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nsm_local_state", + .data = &nsm_local_state, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; @@ -483,6 +501,7 @@ module_param_call(nlm_udpport, param_set_port, param_get_int, &nlm_udpport, 0644); module_param_call(nlm_tcpport, param_set_port, param_get_int, &nlm_tcpport, 0644); +module_param(nsm_use_hostnames, bool, 0644); /* * Initialising and terminating the module. diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index a2dd9ccb9b32..fa370f6eb07b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -38,8 +38,8 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, return nlm_lck_denied_nolocks; /* Obtain host handle */ - if (!(host = nlmsvc_lookup_host(rqstp)) - || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) + if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) + || (argp->monitor && nsm_monitor(host) < 0)) goto no_locks; *hostp = host; @@ -260,7 +260,9 @@ static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *a struct nlm_rqst *call; int stat; - host = nlmsvc_lookup_host(rqstp); + host = nlmsvc_lookup_host(rqstp, + argp->lock.caller, + argp->lock.len); if (host == NULL) return rpc_system_err; @@ -420,10 +422,6 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; - int vers = argp->vers; - int prot = argp->proto >> 1; - - struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) @@ -438,21 +436,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ + memset(&saddr, 0, sizeof(saddr)); saddr.sin_addr.s_addr = argp->addr; + nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); - if ((argp->proto & 1)==0) { - if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { - nlmclnt_recovery(host, argp->state); - nlm_release_host(host); - } - } else { - /* If we run on an NFS server, delete all locks held by the client */ - - if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) { - nlmsvc_free_host_resources(host); - nlm_release_host(host); - } - } return rpc_success; } @@ -468,7 +455,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, dprintk("lockd: GRANTED_RES called\n"); - nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); + nlmsvc_grant_reply(&argp->cookie, argp->status); return rpc_success; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 93c00ee7189d..814c6064c9e0 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -40,7 +40,7 @@ static void nlmsvc_release_block(struct nlm_block *block); static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); -static int nlmsvc_remove_block(struct nlm_block *block); +static void nlmsvc_remove_block(struct nlm_block *block); static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); static void nlmsvc_freegrantargs(struct nlm_rqst *call); @@ -49,7 +49,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops; /* * The list of blocked locks to retry */ -static struct nlm_block * nlm_blocked; +static LIST_HEAD(nlm_blocked); /* * Insert a blocked lock into the global list @@ -57,48 +57,44 @@ static struct nlm_block * nlm_blocked; static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when) { - struct nlm_block **bp, *b; + struct nlm_block *b; + struct list_head *pos; dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); - kref_get(&block->b_count); - if (block->b_queued) - nlmsvc_remove_block(block); - bp = &nlm_blocked; + if (list_empty(&block->b_list)) { + kref_get(&block->b_count); + } else { + list_del_init(&block->b_list); + } + + pos = &nlm_blocked; if (when != NLM_NEVER) { if ((when += jiffies) == NLM_NEVER) when ++; - while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER) - bp = &b->b_next; - } else - while ((b = *bp) != 0) - bp = &b->b_next; + list_for_each(pos, &nlm_blocked) { + b = list_entry(pos, struct nlm_block, b_list); + if (time_after(b->b_when,when) || b->b_when == NLM_NEVER) + break; + } + /* On normal exit from the loop, pos == &nlm_blocked, + * so we will be adding to the end of the list - good + */ + } - block->b_queued = 1; + list_add_tail(&block->b_list, pos); block->b_when = when; - block->b_next = b; - *bp = block; } /* * Remove a block from the global list */ -static int +static inline void nlmsvc_remove_block(struct nlm_block *block) { - struct nlm_block **bp, *b; - - if (!block->b_queued) - return 1; - for (bp = &nlm_blocked; (b = *bp) != 0; bp = &b->b_next) { - if (b == block) { - *bp = block->b_next; - block->b_queued = 0; - nlmsvc_release_block(block); - return 1; - } + if (!list_empty(&block->b_list)) { + list_del_init(&block->b_list); + nlmsvc_release_block(block); } - - return 0; } /* @@ -107,14 +103,14 @@ nlmsvc_remove_block(struct nlm_block *block) static struct nlm_block * nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock) { - struct nlm_block **head, *block; + struct nlm_block *block; struct file_lock *fl; dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n", file, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end, lock->fl.fl_type); - for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) { + list_for_each_entry(block, &nlm_blocked, b_list) { fl = &block->b_call->a_args.lock.fl; dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n", block->b_file, fl->fl_pid, @@ -143,20 +139,20 @@ static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b) * Find a block with a given NLM cookie. */ static inline struct nlm_block * -nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) +nlmsvc_find_block(struct nlm_cookie *cookie) { struct nlm_block *block; - for (block = nlm_blocked; block; block = block->b_next) { - dprintk("cookie: head of blocked queue %p, block %p\n", - nlm_blocked, block); - if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie) - && nlm_cmp_addr(sin, &block->b_host->h_addr)) - break; + list_for_each_entry(block, &nlm_blocked, b_list) { + if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)) + goto found; } - if (block != NULL) - kref_get(&block->b_count); + return NULL; + +found: + dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block); + kref_get(&block->b_count); return block; } @@ -169,6 +165,11 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) * request, but (as I found out later) that's because some implementations * do just this. Never mind the standards comittees, they support our * logging industries. + * + * 10 years later: I hope we can safely ignore these old and broken + * clients by now. Let's fix this so we can uniquely identify an incoming + * GRANTED_RES message by cookie, without having to rely on the client's IP + * address. --okir */ static inline struct nlm_block * nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, @@ -179,7 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_rqst *call = NULL; /* Create host handle for callback */ - host = nlmsvc_lookup_host(rqstp); + host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len); if (host == NULL) return NULL; @@ -192,6 +193,8 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, if (block == NULL) goto failed; kref_init(&block->b_count); + INIT_LIST_HEAD(&block->b_list); + INIT_LIST_HEAD(&block->b_flist); if (!nlmsvc_setgrantargs(call, lock)) goto failed_free; @@ -199,7 +202,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, /* Set notifier function for VFS, and init args */ call->a_args.lock.fl.fl_flags |= FL_SLEEP; call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; - call->a_args.cookie = *cookie; /* see above */ + nlmclnt_next_cookie(&call->a_args.cookie); dprintk("lockd: created block %p...\n", block); @@ -210,8 +213,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, file->f_count++; /* Add to file's list of blocks */ - block->b_fnext = file->f_blocks; - file->f_blocks = block; + list_add(&block->b_flist, &file->f_blocks); /* Set up RPC arguments for callback */ block->b_call = call; @@ -248,19 +250,13 @@ static void nlmsvc_free_block(struct kref *kref) { struct nlm_block *block = container_of(kref, struct nlm_block, b_count); struct nlm_file *file = block->b_file; - struct nlm_block **bp; dprintk("lockd: freeing block %p...\n", block); - down(&file->f_sema); /* Remove block from file's list of blocks */ - for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) { - if (*bp == block) { - *bp = block->b_fnext; - break; - } - } - up(&file->f_sema); + mutex_lock(&file->f_mutex); + list_del_init(&block->b_flist); + mutex_unlock(&file->f_mutex); nlmsvc_freegrantargs(block->b_call); nlm_release_call(block->b_call); @@ -274,47 +270,32 @@ static void nlmsvc_release_block(struct nlm_block *block) kref_put(&block->b_count, nlmsvc_free_block); } -static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file) -{ - struct nlm_block *block; - - down(&file->f_sema); - for (block = file->f_blocks; block != NULL; block = block->b_fnext) - block->b_host->h_inuse = 1; - up(&file->f_sema); -} - -static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file) +/* + * Loop over all blocks and delete blocks held by + * a matching host. + */ +void nlmsvc_traverse_blocks(struct nlm_host *host, + struct nlm_file *file, + nlm_host_match_fn_t match) { - struct nlm_block *block; + struct nlm_block *block, *next; restart: - down(&file->f_sema); - for (block = file->f_blocks; block != NULL; block = block->b_fnext) { - if (host != NULL && host != block->b_host) + mutex_lock(&file->f_mutex); + list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) { + if (!match(block->b_host, host)) continue; - if (!block->b_queued) + /* Do not destroy blocks that are not on + * the global retry list - why? */ + if (list_empty(&block->b_list)) continue; kref_get(&block->b_count); - up(&file->f_sema); + mutex_unlock(&file->f_mutex); nlmsvc_unlink_block(block); nlmsvc_release_block(block); goto restart; } - up(&file->f_sema); -} - -/* - * Loop over all blocks and perform the action specified. - * (NLM_ACT_CHECK handled by nlmsvc_inspect_file). - */ -void -nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action) -{ - if (action == NLM_ACT_MARK) - nlmsvc_act_mark(host, file); - else - nlmsvc_act_unlock(host, file); + mutex_unlock(&file->f_mutex); } /* @@ -373,7 +354,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, lock->fl.fl_flags &= ~FL_SLEEP; again: /* Lock file against concurrent access */ - down(&file->f_sema); + mutex_lock(&file->f_mutex); /* Get existing block (in case client is busy-waiting) */ block = nlmsvc_lookup_block(file, lock); if (block == NULL) { @@ -411,10 +392,10 @@ again: /* If we don't have a block, create and initialize it. Then * retry because we may have slept in kmalloc. */ - /* We have to release f_sema as nlmsvc_create_block may try to + /* We have to release f_mutex as nlmsvc_create_block may try to * to claim it while doing host garbage collection */ if (newblock == NULL) { - up(&file->f_sema); + mutex_unlock(&file->f_mutex); dprintk("lockd: blocking on this lock (allocating).\n"); if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie))) return nlm_lck_denied_nolocks; @@ -424,7 +405,7 @@ again: /* Append to list of blocked */ nlmsvc_insert_block(newblock, NLM_NEVER); out: - up(&file->f_sema); + mutex_unlock(&file->f_mutex); nlmsvc_release_block(newblock); nlmsvc_release_block(block); dprintk("lockd: nlmsvc_lock returned %u\n", ret); @@ -451,6 +432,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock, (long long)conflock->fl.fl_start, (long long)conflock->fl.fl_end); conflock->caller = "somehost"; /* FIXME */ + conflock->len = strlen(conflock->caller); conflock->oh.len = 0; /* don't return OH info */ conflock->svid = conflock->fl.fl_pid; return nlm_lck_denied; @@ -507,9 +489,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); - down(&file->f_sema); + mutex_lock(&file->f_mutex); block = nlmsvc_lookup_block(file, lock); - up(&file->f_sema); + mutex_unlock(&file->f_mutex); if (block != NULL) { status = nlmsvc_unlink_block(block); nlmsvc_release_block(block); @@ -527,10 +509,10 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) static void nlmsvc_notify_blocked(struct file_lock *fl) { - struct nlm_block **bp, *block; + struct nlm_block *block; dprintk("lockd: VFS unblock notification for block %p\n", fl); - for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) { + list_for_each_entry(block, &nlm_blocked, b_list) { if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { nlmsvc_insert_block(block, 0); svc_wake_up(block->b_daemon); @@ -663,17 +645,14 @@ static const struct rpc_call_ops nlmsvc_grant_ops = { * block. */ void -nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status) +nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status) { struct nlm_block *block; - struct nlm_file *file; - dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n", - *(unsigned int *)(cookie->data), - ntohl(rqstp->rq_addr.sin_addr.s_addr), status); - if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr))) + dprintk("grant_reply: looking for cookie %x, s=%d \n", + *(unsigned int *)(cookie->data), status); + if (!(block = nlmsvc_find_block(cookie))) return; - file = block->b_file; if (block) { if (status == NLM_LCK_DENIED_GRACE_PERIOD) { @@ -696,16 +675,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status unsigned long nlmsvc_retry_blocked(void) { - struct nlm_block *block; + unsigned long timeout = MAX_SCHEDULE_TIMEOUT; + struct nlm_block *block; + + while (!list_empty(&nlm_blocked)) { + block = list_entry(nlm_blocked.next, struct nlm_block, b_list); - dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", - nlm_blocked, - nlm_blocked? nlm_blocked->b_when : 0); - while ((block = nlm_blocked) != 0) { if (block->b_when == NLM_NEVER) break; - if (time_after(block->b_when,jiffies)) + if (time_after(block->b_when,jiffies)) { + timeout = block->b_when - jiffies; break; + } + dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); kref_get(&block->b_count); @@ -713,8 +695,5 @@ nlmsvc_retry_blocked(void) nlmsvc_release_block(block); } - if ((block = nlm_blocked) && block->b_when != NLM_NEVER) - return (block->b_when - jiffies); - - return MAX_SCHEDULE_TIMEOUT; + return timeout; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index dbb66a3b5cd9..75b2c81bcb93 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -66,8 +66,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, return nlm_lck_denied_nolocks; /* Obtain host handle */ - if (!(host = nlmsvc_lookup_host(rqstp)) - || (argp->monitor && !host->h_monitored && nsm_monitor(host) < 0)) + if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) + || (argp->monitor && nsm_monitor(host) < 0)) goto no_locks; *hostp = host; @@ -287,7 +287,9 @@ static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *ar struct nlm_rqst *call; int stat; - host = nlmsvc_lookup_host(rqstp); + host = nlmsvc_lookup_host(rqstp, + argp->lock.caller, + argp->lock.len); if (host == NULL) return rpc_system_err; @@ -449,9 +451,6 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, void *resp) { struct sockaddr_in saddr = rqstp->rq_addr; - int vers = argp->vers; - int prot = argp->proto >> 1; - struct nlm_host *host; dprintk("lockd: SM_NOTIFY called\n"); if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) @@ -466,19 +465,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, /* Obtain the host pointer for this NFS server and try to * reclaim all locks we hold on this server. */ + memset(&saddr, 0, sizeof(saddr)); saddr.sin_addr.s_addr = argp->addr; - if ((argp->proto & 1)==0) { - if ((host = nlmclnt_lookup_host(&saddr, prot, vers)) != NULL) { - nlmclnt_recovery(host, argp->state); - nlm_release_host(host); - } - } else { - /* If we run on an NFS server, delete all locks held by the client */ - if ((host = nlm_lookup_host(1, &saddr, prot, vers)) != NULL) { - nlmsvc_free_host_resources(host); - nlm_release_host(host); - } - } + nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); return rpc_success; } @@ -495,7 +484,7 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, dprintk("lockd: GRANTED_RES called\n"); - nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); + nlmsvc_grant_reply(&argp->cookie, argp->status); return rpc_success; } diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c index 27288c83da96..b9926ce8782e 100644 --- a/fs/lockd/svcshare.c +++ b/fs/lockd/svcshare.c @@ -85,24 +85,20 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file, } /* - * Traverse all shares for a given file (and host). - * NLM_ACT_CHECK is handled by nlmsvc_inspect_file. + * Traverse all shares for a given file, and delete + * those owned by the given (type of) host */ -void -nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action) +void nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, + nlm_host_match_fn_t match) { struct nlm_share *share, **shpp; shpp = &file->f_shares; while ((share = *shpp) != NULL) { - if (action == NLM_ACT_MARK) - share->s_host->h_inuse = 1; - else if (action == NLM_ACT_UNLOCK) { - if (host == NULL || host == share->s_host) { - *shpp = share->s_next; - kfree(share); - continue; - } + if (match(share->s_host, host)) { + *shpp = share->s_next; + kfree(share); + continue; } shpp = &share->s_next; } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index a92dd98f8401..514f5f20701e 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -25,9 +25,9 @@ /* * Global file hash table */ -#define FILE_HASH_BITS 5 +#define FILE_HASH_BITS 7 #define FILE_NRHASH (1<<FILE_HASH_BITS) -static struct nlm_file * nlm_files[FILE_NRHASH]; +static struct hlist_head nlm_files[FILE_NRHASH]; static DEFINE_MUTEX(nlm_file_mutex); #ifdef NFSD_DEBUG @@ -82,6 +82,7 @@ u32 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, struct nfs_fh *f) { + struct hlist_node *pos; struct nlm_file *file; unsigned int hash; u32 nfserr; @@ -93,7 +94,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, /* Lock file table */ mutex_lock(&nlm_file_mutex); - for (file = nlm_files[hash]; file; file = file->f_next) + hlist_for_each_entry(file, pos, &nlm_files[hash], f_list) if (!nfs_compare_fh(&file->f_handle, f)) goto found; @@ -105,8 +106,9 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, goto out_unlock; memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); - file->f_hash = hash; - init_MUTEX(&file->f_sema); + mutex_init(&file->f_mutex); + INIT_HLIST_NODE(&file->f_list); + INIT_LIST_HEAD(&file->f_blocks); /* Open the file. Note that this must not sleep for too long, else * we would lock up lockd:-) So no NFS re-exports, folks. @@ -115,12 +117,11 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, * the file. */ if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) { - dprintk("lockd: open failed (nfserr %d)\n", ntohl(nfserr)); + dprintk("lockd: open failed (error %d)\n", nfserr); goto out_free; } - file->f_next = nlm_files[hash]; - nlm_files[hash] = file; + hlist_add_head(&file->f_list, &nlm_files[hash]); found: dprintk("lockd: found file %p (count %d)\n", file, file->f_count); @@ -149,22 +150,14 @@ out_free: static inline void nlm_delete_file(struct nlm_file *file) { - struct nlm_file **fp, *f; - nlm_debug_print_file("closing file", file); - - fp = nlm_files + file->f_hash; - while ((f = *fp) != NULL) { - if (f == file) { - *fp = file->f_next; - nlmsvc_ops->fclose(file->f_file); - kfree(file); - return; - } - fp = &f->f_next; + if (!hlist_unhashed(&file->f_list)) { + hlist_del(&file->f_list); + nlmsvc_ops->fclose(file->f_file); + kfree(file); + } else { + printk(KERN_WARNING "lockd: attempt to release unknown file!\n"); } - - printk(KERN_WARNING "lockd: attempt to release unknown file!\n"); } /* @@ -172,7 +165,8 @@ nlm_delete_file(struct nlm_file *file) * action. */ static int -nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action) +nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, + nlm_host_match_fn_t match) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; @@ -186,17 +180,11 @@ again: /* update current lock count */ file->f_locks++; + lockhost = (struct nlm_host *) fl->fl_owner; - if (action == NLM_ACT_MARK) - lockhost->h_inuse = 1; - else if (action == NLM_ACT_CHECK) - return 1; - else if (action == NLM_ACT_UNLOCK) { + if (match(lockhost, host)) { struct file_lock lock = *fl; - if (host && lockhost != host) - continue; - lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; @@ -213,53 +201,66 @@ again: } /* - * Operate on a single file + * Inspect a single file */ static inline int -nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action) +nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, nlm_host_match_fn_t match) { - if (action == NLM_ACT_CHECK) { - /* Fast path for mark and sweep garbage collection */ - if (file->f_count || file->f_blocks || file->f_shares) + nlmsvc_traverse_blocks(host, file, match); + nlmsvc_traverse_shares(host, file, match); + return nlm_traverse_locks(host, file, match); +} + +/* + * Quick check whether there are still any locks, blocks or + * shares on a given file. + */ +static inline int +nlm_file_inuse(struct nlm_file *file) +{ + struct inode *inode = nlmsvc_file_inode(file); + struct file_lock *fl; + + if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) + return 1; + + for (fl = inode->i_flock; fl; fl = fl->fl_next) { + if (fl->fl_lmops == &nlmsvc_lock_operations) return 1; - } else { - nlmsvc_traverse_blocks(host, file, action); - nlmsvc_traverse_shares(host, file, action); } - return nlm_traverse_locks(host, file, action); + file->f_locks = 0; + return 0; } /* * Loop over all files in the file table. */ static int -nlm_traverse_files(struct nlm_host *host, int action) +nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match) { - struct nlm_file *file, **fp; + struct hlist_node *pos, *next; + struct nlm_file *file; int i, ret = 0; mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { - fp = nlm_files + i; - while ((file = *fp) != NULL) { + hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { file->f_count++; mutex_unlock(&nlm_file_mutex); /* Traverse locks, blocks and shares of this file * and update file->f_locks count */ - if (nlm_inspect_file(host, file, action)) + if (nlm_inspect_file(host, file, match)) ret = 1; mutex_lock(&nlm_file_mutex); file->f_count--; /* No more references to this file. Let go of it. */ - if (!file->f_blocks && !file->f_locks + if (list_empty(&file->f_blocks) && !file->f_locks && !file->f_shares && !file->f_count) { - *fp = file->f_next; + hlist_del(&file->f_list); nlmsvc_ops->fclose(file->f_file); kfree(file); - } else { - fp = &file->f_next; } } } @@ -286,23 +287,54 @@ nlm_release_file(struct nlm_file *file) mutex_lock(&nlm_file_mutex); /* If there are no more locks etc, delete the file */ - if(--file->f_count == 0) { - if(!nlm_inspect_file(NULL, file, NLM_ACT_CHECK)) - nlm_delete_file(file); - } + if (--file->f_count == 0 && !nlm_file_inuse(file)) + nlm_delete_file(file); mutex_unlock(&nlm_file_mutex); } /* + * Helpers function for resource traversal + * + * nlmsvc_mark_host: + * used by the garbage collector; simply sets h_inuse. + * Always returns 0. + * + * nlmsvc_same_host: + * returns 1 iff the two hosts match. Used to release + * all resources bound to a specific host. + * + * nlmsvc_is_client: + * returns 1 iff the host is a client. + * Used by nlmsvc_invalidate_all + */ +static int +nlmsvc_mark_host(struct nlm_host *host, struct nlm_host *dummy) +{ + host->h_inuse = 1; + return 0; +} + +static int +nlmsvc_same_host(struct nlm_host *host, struct nlm_host *other) +{ + return host == other; +} + +static int +nlmsvc_is_client(struct nlm_host *host, struct nlm_host *dummy) +{ + return host->h_server; +} + +/* * Mark all hosts that still hold resources */ void nlmsvc_mark_resources(void) { dprintk("lockd: nlmsvc_mark_resources\n"); - - nlm_traverse_files(NULL, NLM_ACT_MARK); + nlm_traverse_files(NULL, nlmsvc_mark_host); } /* @@ -313,23 +345,25 @@ nlmsvc_free_host_resources(struct nlm_host *host) { dprintk("lockd: nlmsvc_free_host_resources\n"); - if (nlm_traverse_files(host, NLM_ACT_UNLOCK)) + if (nlm_traverse_files(host, nlmsvc_same_host)) { printk(KERN_WARNING - "lockd: couldn't remove all locks held by %s", + "lockd: couldn't remove all locks held by %s\n", host->h_name); + BUG(); + } } /* - * delete all hosts structs for clients + * Remove all locks held for clients */ void nlmsvc_invalidate_all(void) { - struct nlm_host *host; - while ((host = nlm_find_client()) != NULL) { - nlmsvc_free_host_resources(host); - host->h_expires = 0; - host->h_killed = 1; - nlm_release_host(host); - } + /* Release all locks held by NFS clients. + * Previously, the code would call + * nlmsvc_free_host_resources for each client in + * turn, which is about as inefficient as it gets. + * Now we just do it once in nlm_traverse_files. + */ + nlm_traverse_files(NULL, nlmsvc_is_client); } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index cfe141e5d759..e13fa23bd108 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -319,12 +319,25 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) static struct cache_head *export_table[EXPORT_HASHMAX]; +static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) +{ + int i; + + for (i = 0; i < fsloc->locations_count; i++) { + kfree(fsloc->locations[i].path); + kfree(fsloc->locations[i].hosts); + } + kfree(fsloc->locations); +} + static void svc_export_put(struct kref *ref) { struct svc_export *exp = container_of(ref, struct svc_export, h.ref); dput(exp->ex_dentry); mntput(exp->ex_mnt); auth_domain_put(exp->ex_client); + kfree(exp->ex_path); + nfsd4_fslocs_free(&exp->ex_fslocs); kfree(exp); } @@ -386,6 +399,69 @@ static int check_export(struct inode *inode, int flags) } +#ifdef CONFIG_NFSD_V4 + +static int +fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) +{ + int len; + int migrated, i, err; + + len = qword_get(mesg, buf, PAGE_SIZE); + if (len != 5 || memcmp(buf, "fsloc", 5)) + return 0; + + /* listsize */ + err = get_int(mesg, &fsloc->locations_count); + if (err) + return err; + if (fsloc->locations_count > MAX_FS_LOCATIONS) + return -EINVAL; + if (fsloc->locations_count == 0) + return 0; + + fsloc->locations = kzalloc(fsloc->locations_count + * sizeof(struct nfsd4_fs_location), GFP_KERNEL); + if (!fsloc->locations) + return -ENOMEM; + for (i=0; i < fsloc->locations_count; i++) { + /* colon separated host list */ + err = -EINVAL; + len = qword_get(mesg, buf, PAGE_SIZE); + if (len <= 0) + goto out_free_all; + err = -ENOMEM; + fsloc->locations[i].hosts = kstrdup(buf, GFP_KERNEL); + if (!fsloc->locations[i].hosts) + goto out_free_all; + err = -EINVAL; + /* slash separated path component list */ + len = qword_get(mesg, buf, PAGE_SIZE); + if (len <= 0) + goto out_free_all; + err = -ENOMEM; + fsloc->locations[i].path = kstrdup(buf, GFP_KERNEL); + if (!fsloc->locations[i].path) + goto out_free_all; + } + /* migrated */ + err = get_int(mesg, &migrated); + if (err) + goto out_free_all; + err = -EINVAL; + if (migrated < 0 || migrated > 1) + goto out_free_all; + fsloc->migrated = migrated; + return 0; +out_free_all: + nfsd4_fslocs_free(fsloc); + return err; +} + +#else /* CONFIG_NFSD_V4 */ +static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; } +#endif + static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ @@ -398,6 +474,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) int an_int; nd.dentry = NULL; + exp.ex_path = NULL; if (mesg[mlen-1] != '\n') return -EINVAL; @@ -428,6 +505,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_client = dom; exp.ex_mnt = nd.mnt; exp.ex_dentry = nd.dentry; + exp.ex_path = kstrdup(buf, GFP_KERNEL); + err = -ENOMEM; + if (!exp.ex_path) + goto out; /* expiry */ err = -EINVAL; @@ -435,6 +516,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (exp.h.expiry_time == 0) goto out; + /* fs locations */ + exp.ex_fslocs.locations = NULL; + exp.ex_fslocs.locations_count = 0; + exp.ex_fslocs.migrated = 0; + /* flags */ err = get_int(&mesg, &an_int); if (err == -ENOENT) @@ -460,6 +546,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = check_export(nd.dentry->d_inode, exp.ex_flags); if (err) goto out; + + err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); + if (err) + goto out; } expp = svc_export_lookup(&exp); @@ -473,6 +563,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) else exp_put(expp); out: + kfree(exp.ex_path); if (nd.dentry) path_release(&nd); out_no_path: @@ -482,7 +573,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) return err; } -static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong); +static void exp_flags(struct seq_file *m, int flag, int fsid, + uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); static int svc_export_show(struct seq_file *m, struct cache_detail *cd, @@ -501,8 +593,8 @@ static int svc_export_show(struct seq_file *m, seq_putc(m, '('); if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) - exp_flags(m, exp->ex_flags, exp->ex_fsid, - exp->ex_anon_uid, exp->ex_anon_gid); + exp_flags(m, exp->ex_flags, exp->ex_fsid, + exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs); seq_puts(m, ")\n"); return 0; } @@ -524,6 +616,10 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_client = item->ex_client; new->ex_dentry = dget(item->ex_dentry); new->ex_mnt = mntget(item->ex_mnt); + new->ex_path = NULL; + new->ex_fslocs.locations = NULL; + new->ex_fslocs.locations_count = 0; + new->ex_fslocs.migrated = 0; } static void export_update(struct cache_head *cnew, struct cache_head *citem) @@ -535,6 +631,14 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) new->ex_anon_uid = item->ex_anon_uid; new->ex_anon_gid = item->ex_anon_gid; new->ex_fsid = item->ex_fsid; + new->ex_path = item->ex_path; + item->ex_path = NULL; + new->ex_fslocs.locations = item->ex_fslocs.locations; + item->ex_fslocs.locations = NULL; + new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; + item->ex_fslocs.locations_count = 0; + new->ex_fslocs.migrated = item->ex_fslocs.migrated; + item->ex_fslocs.migrated = 0; } static struct cache_head *svc_export_alloc(void) @@ -1048,30 +1152,21 @@ int exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, struct cache_req *creq) { - struct svc_expkey *fsid_key; struct svc_export *exp; int rv; u32 fsidv[2]; mk_fsid_v1(fsidv, 0); - fsid_key = exp_find_key(clp, 1, fsidv, creq); - if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN) + exp = exp_find(clp, 1, fsidv, creq); + if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN) return nfserr_dropit; - if (!fsid_key || IS_ERR(fsid_key)) - return nfserr_perm; - - exp = exp_get_by_name(clp, fsid_key->ek_mnt, fsid_key->ek_dentry, creq); if (exp == NULL) - rv = nfserr_perm; + return nfserr_perm; else if (IS_ERR(exp)) - rv = nfserrno(PTR_ERR(exp)); - else { - rv = fh_compose(fhp, exp, - fsid_key->ek_dentry, NULL); - exp_put(exp); - } - cache_put(&fsid_key->h, &svc_expkey_cache); + return nfserrno(PTR_ERR(exp)); + rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); + exp_put(exp); return rv; } @@ -1158,7 +1253,8 @@ static struct flags { { 0, {"", ""}} }; -static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong) +static void exp_flags(struct seq_file *m, int flag, int fsid, + uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) { int first = 0; struct flags *flg; @@ -1174,6 +1270,21 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); if (anong != (gid_t)-2 && anong != (0x10000-2)) seq_printf(m, "%sanongid=%d", first++?",":"", anong); + if (fsloc && fsloc->locations_count > 0) { + char *loctype = (fsloc->migrated) ? "refer" : "replicas"; + int i; + + seq_printf(m, "%s%s=", first++?",":"", loctype); + seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\"); + seq_putc(m, '@'); + seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\"); + for (i = 1; i < fsloc->locations_count; i++) { + seq_putc(m, ';'); + seq_escape(m, fsloc->locations[i].path, ",;@ \t\n\\"); + seq_putc(m, '@'); + seq_escape(m, fsloc->locations[i].hosts, ",;@ \t\n\\"); + } + } } static int e_show(struct seq_file *m, void *p) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index fe56b38364cc..9187755661df 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -241,7 +241,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, rqstp->rq_res.page_len = w; while (w > 0) { - if (!svc_take_res_page(rqstp)) + if (!rqstp->rq_respages[rqstp->rq_resused++]) return 0; w -= PAGE_SIZE; } @@ -333,4 +333,5 @@ struct svc_version nfsd_acl_version2 = { .vs_proc = nfsd_acl_procedures2, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS3_SVC_XDRSIZE, + .vs_hidden = 1, }; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 16e10c170aed..d4bdc00c1169 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -185,7 +185,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, rqstp->rq_res.page_len = w; while (w > 0) { - if (!svc_take_res_page(rqstp)) + if (!rqstp->rq_respages[rqstp->rq_resused++]) return 0; w -= PAGE_SIZE; } @@ -263,5 +263,6 @@ struct svc_version nfsd_acl_version3 = { .vs_proc = nfsd_acl_procedures3, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS3_SVC_XDRSIZE, + .vs_hidden = 1, }; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index f61142afea44..a5ebc7dbb384 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -160,6 +160,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, struct nfsd3_readres *resp) { int nfserr; + u32 max_blocksize = svc_max_payload(rqstp); dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", SVCFH_fmt(&argp->fh), @@ -172,15 +173,15 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, */ resp->count = argp->count; - if (NFSSVC_MAXBLKSIZE < resp->count) - resp->count = NFSSVC_MAXBLKSIZE; + if (max_blocksize < resp->count) + resp->count = max_blocksize; svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_read(rqstp, &resp->fh, NULL, argp->offset, - argp->vec, argp->vlen, + rqstp->rq_vec, argp->vlen, &resp->count); if (nfserr == 0) { struct inode *inode = resp->fh.fh_dentry->d_inode; @@ -210,7 +211,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, resp->committed = argp->stable; nfserr = nfsd_write(rqstp, &resp->fh, NULL, argp->offset, - argp->vec, argp->vlen, + rqstp->rq_vec, argp->vlen, argp->len, &resp->committed); resp->count = argp->count; @@ -538,15 +539,16 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, struct nfsd3_fsinfores *resp) { int nfserr; + u32 max_blocksize = svc_max_payload(rqstp); dprintk("nfsd: FSINFO(3) %s\n", SVCFH_fmt(&argp->fh)); - resp->f_rtmax = NFSSVC_MAXBLKSIZE; - resp->f_rtpref = NFSSVC_MAXBLKSIZE; + resp->f_rtmax = max_blocksize; + resp->f_rtpref = max_blocksize; resp->f_rtmult = PAGE_SIZE; - resp->f_wtmax = NFSSVC_MAXBLKSIZE; - resp->f_wtpref = NFSSVC_MAXBLKSIZE; + resp->f_wtmax = max_blocksize; + resp->f_wtpref = max_blocksize; resp->f_wtmult = PAGE_SIZE; resp->f_dtpref = PAGE_SIZE; resp->f_maxfilesize = ~(u32) 0; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 243d94b9653a..247d518248bf 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -330,6 +330,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, { unsigned int len; int v,pn; + u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh)) || !(p = xdr_decode_hyper(p, &args->offset))) @@ -337,17 +338,16 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, len = args->count = ntohl(*p++); - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > max_blocksize) + len = max_blocksize; /* set up the kvec */ v=0; while (len > 0) { - pn = rqstp->rq_resused; - svc_take_page(rqstp); - args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); - args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; - len -= args->vec[v].iov_len; + pn = rqstp->rq_resused++; + rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; + len -= rqstp->rq_vec[v].iov_len; v++; } args->vlen = v; @@ -359,6 +359,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_writeargs *args) { unsigned int len, v, hdr; + u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh)) || !(p = xdr_decode_hyper(p, &args->offset))) @@ -373,22 +374,22 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, rqstp->rq_arg.len - hdr < len) return 0; - args->vec[0].iov_base = (void*)p; - args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_base = (void*)p; + rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > max_blocksize) + len = max_blocksize; v= 0; - while (len > args->vec[v].iov_len) { - len -= args->vec[v].iov_len; + while (len > rqstp->rq_vec[v].iov_len) { + len -= rqstp->rq_vec[v].iov_len; v++; - args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); - args->vec[v].iov_len = PAGE_SIZE; + rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); + rqstp->rq_vec[v].iov_len = PAGE_SIZE; } - args->vec[v].iov_len = len; + rqstp->rq_vec[v].iov_len = len; args->vlen = v+1; - return args->count == args->len && args->vec[0].iov_len > 0; + return args->count == args->len && rqstp->rq_vec[0].iov_len > 0; } int @@ -446,11 +447,11 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p, * This page appears in the rq_res.pages list, but as pages_len is always * 0, it won't get in the way */ - svc_take_page(rqstp); len = ntohl(*p++); if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) return 0; - args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + args->tname = new = + page_address(rqstp->rq_respages[rqstp->rq_resused++]); args->tlen = len; /* first copy and check from the first page */ old = (char*)p; @@ -522,8 +523,8 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, { if (!(p = decode_fh(p, &args->fh))) return 0; - svc_take_page(rqstp); - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + args->buffer = + page_address(rqstp->rq_respages[rqstp->rq_resused++]); return xdr_argsize_check(rqstp, p); } @@ -554,8 +555,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - svc_take_page(rqstp); - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + args->buffer = + page_address(rqstp->rq_respages[rqstp->rq_resused++]); return xdr_argsize_check(rqstp, p); } @@ -565,6 +566,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_readdirargs *args) { int len, pn; + u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh))) return 0; @@ -573,13 +575,12 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, args->dircount = ntohl(*p++); args->count = ntohl(*p++); - len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE : + len = (args->count > max_blocksize) ? max_blocksize : args->count; args->count = len; while (len > 0) { - pn = rqstp->rq_resused; - svc_take_page(rqstp); + pn = rqstp->rq_resused++; if (!args->buffer) args->buffer = page_address(rqstp->rq_respages[pn]); len -= PAGE_SIZE; @@ -668,7 +669,6 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, rqstp->rq_res.page_len = resp->len; if (resp->len & 3) { /* need to pad the tail */ - rqstp->rq_restailpage = 0; rqstp->rq_res.tail[0].iov_base = p; *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); @@ -693,7 +693,6 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p, rqstp->rq_res.page_len = resp->count; if (resp->count & 3) { /* need to pad the tail */ - rqstp->rq_restailpage = 0; rqstp->rq_res.tail[0].iov_base = p; *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); @@ -768,7 +767,6 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p, rqstp->rq_res.page_len = (resp->count) << 2; /* add the 'tail' to the end of the 'head' page - page 0. */ - rqstp->rq_restailpage = 0; rqstp->rq_res.tail[0].iov_base = p; *p++ = 0; /* no more entries */ *p++ = htonl(resp->common.err == nfserr_eof); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index edb107e61b91..5d94555cdc83 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -63,6 +63,8 @@ #define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) +#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP) + #define MASK_EQUAL(mask1, mask2) \ ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) @@ -96,24 +98,26 @@ deny_mask(u32 allow_mask, unsigned int flags) /* XXX: modify functions to return NFS errors; they're only ever * used by nfs code, after all.... */ -static int -mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) +/* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the + * side of being more restrictive, so the mode bit mapping below is + * pessimistic. An optimistic version would be needed to handle DENY's, + * but we espect to coalesce all ALLOWs and DENYs before mapping to mode + * bits. */ + +static void +low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) { - u32 ignore = 0; + u32 write_mode = NFS4_WRITE_MODE; - if (!(flags & NFS4_ACL_DIR)) - ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */ - perm |= ignore; + if (flags & NFS4_ACL_DIR) + write_mode |= NFS4_ACE_DELETE_CHILD; *mode = 0; if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) *mode |= ACL_READ; - if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) + if ((perm & write_mode) == write_mode) *mode |= ACL_WRITE; if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) *mode |= ACL_EXECUTE; - if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags))) - return -EINVAL; - return 0; } struct ace_container { @@ -338,38 +342,6 @@ sort_pacl(struct posix_acl *pacl) return; } -static int -write_pace(struct nfs4_ace *ace, struct posix_acl *pacl, - struct posix_acl_entry **pace, short tag, unsigned int flags) -{ - struct posix_acl_entry *this = *pace; - - if (*pace == pacl->a_entries + pacl->a_count) - return -EINVAL; /* fell off the end */ - (*pace)++; - this->e_tag = tag; - if (tag == ACL_USER_OBJ) - flags |= NFS4_ACL_OWNER; - if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags)) - return -EINVAL; - this->e_id = (tag == ACL_USER || tag == ACL_GROUP ? - ace->who : ACL_UNDEFINED_ID); - return 0; -} - -static struct nfs4_ace * -get_next_v4_ace(struct list_head **p, struct list_head *head) -{ - struct nfs4_ace *ace; - - *p = (*p)->next; - if (*p == head) - return NULL; - ace = list_entry(*p, struct nfs4_ace, l_ace); - - return ace; -} - int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, struct posix_acl **dpacl, unsigned int flags) @@ -385,42 +357,23 @@ nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, goto out; error = nfs4_acl_split(acl, dacl); - if (error < 0) + if (error) goto out_acl; - if (pacl != NULL) { - if (acl->naces == 0) { - error = -ENODATA; - goto try_dpacl; - } - - *pacl = _nfsv4_to_posix_one(acl, flags); - if (IS_ERR(*pacl)) { - error = PTR_ERR(*pacl); - *pacl = NULL; - goto out_acl; - } + *pacl = _nfsv4_to_posix_one(acl, flags); + if (IS_ERR(*pacl)) { + error = PTR_ERR(*pacl); + *pacl = NULL; + goto out_acl; } -try_dpacl: - if (dpacl != NULL) { - if (dacl->naces == 0) { - if (pacl == NULL || *pacl == NULL) - error = -ENODATA; - goto out_acl; - } - - error = 0; - *dpacl = _nfsv4_to_posix_one(dacl, flags); - if (IS_ERR(*dpacl)) { - error = PTR_ERR(*dpacl); - *dpacl = NULL; - goto out_acl; - } + *dpacl = _nfsv4_to_posix_one(dacl, flags); + if (IS_ERR(*dpacl)) { + error = PTR_ERR(*dpacl); + *dpacl = NULL; } - out_acl: - if (error && pacl) { + if (error) { posix_acl_release(*pacl); *pacl = NULL; } @@ -429,349 +382,311 @@ out: return error; } +/* + * While processing the NFSv4 ACE, this maintains bitmasks representing + * which permission bits have been allowed and which denied to a given + * entity: */ +struct posix_ace_state { + u32 allow; + u32 deny; +}; + +struct posix_user_ace_state { + uid_t uid; + struct posix_ace_state perms; +}; + +struct posix_ace_state_array { + int n; + struct posix_user_ace_state aces[]; +}; + +/* + * While processing the NFSv4 ACE, this maintains the partial permissions + * calculated so far: */ + +struct posix_acl_state { + struct posix_ace_state owner; + struct posix_ace_state group; + struct posix_ace_state other; + struct posix_ace_state everyone; + struct posix_ace_state mask; /* Deny unused in this case */ + struct posix_ace_state_array *users; + struct posix_ace_state_array *groups; +}; + static int -same_who(struct nfs4_ace *a, struct nfs4_ace *b) +init_state(struct posix_acl_state *state, int cnt) { - return a->whotype == b->whotype && - (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who); + int alloc; + + memset(state, 0, sizeof(struct posix_acl_state)); + /* + * In the worst case, each individual acl could be for a distinct + * named user or group, but we don't no which, so we allocate + * enough space for either: + */ + alloc = sizeof(struct posix_ace_state_array) + + cnt*sizeof(struct posix_ace_state); + state->users = kzalloc(alloc, GFP_KERNEL); + if (!state->users) + return -ENOMEM; + state->groups = kzalloc(alloc, GFP_KERNEL); + if (!state->groups) { + kfree(state->users); + return -ENOMEM; + } + return 0; } -static int -complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny, - unsigned int flags) -{ - int ignore = 0; - if (!(flags & NFS4_ACL_DIR)) - ignore |= NFS4_ACE_DELETE_CHILD; - return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags), - ignore|deny->access_mask) && - allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && - deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE && - allow->flag == deny->flag && - same_who(allow, deny); +static void +free_state(struct posix_acl_state *state) { + kfree(state->users); + kfree(state->groups); } -static inline int -user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p, - struct posix_acl *pacl, struct posix_acl_entry **pace, - unsigned int flags) +static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_state *astate) { - int error = -EINVAL; - struct nfs4_ace *ace, *ace2; - - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - if (ace2type(ace) != ACL_USER_OBJ) - goto out; - error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags); - if (error < 0) - goto out; - error = -EINVAL; - ace2 = get_next_v4_ace(p, &n4acl->ace_head); - if (ace2 == NULL) - goto out; - if (!complementary_ace_pair(ace, ace2, flags)) - goto out; - error = 0; -out: - return error; + state->mask.allow |= astate->allow; } -static inline int -users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, - struct nfs4_ace **mask_ace, - struct posix_acl *pacl, struct posix_acl_entry **pace, - unsigned int flags) -{ - int error = -EINVAL; - struct nfs4_ace *ace, *ace2; +/* + * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS, + * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate + * to traditional read/write/execute permissions. + * + * It's problematic to reject acls that use certain mode bits, because it + * places the burden on users to learn the rules about which bits one + * particular server sets, without giving the user a lot of help--we return an + * error that could mean any number of different things. To make matters + * worse, the problematic bits might be introduced by some application that's + * automatically mapping from some other acl model. + * + * So wherever possible we accept anything, possibly erring on the side of + * denying more permissions than necessary. + * + * However we do reject *explicit* DENY's of a few bits representing + * permissions we could never deny: + */ - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - while (ace2type(ace) == ACL_USER) { - if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) - goto out; - if (*mask_ace && - !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) - goto out; - *mask_ace = ace; - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) - goto out; - error = write_pace(ace, pacl, pace, ACL_USER, flags); - if (error < 0) - goto out; - error = -EINVAL; - ace2 = get_next_v4_ace(p, &n4acl->ace_head); - if (ace2 == NULL) - goto out; - if (!complementary_ace_pair(ace, ace2, flags)) - goto out; - if ((*mask_ace)->flag != ace2->flag || - !same_who(*mask_ace, ace2)) - goto out; - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - } - error = 0; -out: - return error; +static inline int check_deny(u32 mask, int isowner) +{ + if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL)) + return -EINVAL; + if (!isowner) + return 0; + if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)) + return -EINVAL; + return 0; } -static inline int -group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, - struct nfs4_ace **mask_ace, - struct posix_acl *pacl, struct posix_acl_entry **pace, - unsigned int flags) +static struct posix_acl * +posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) { - int error = -EINVAL; - struct nfs4_ace *ace, *ace2; - struct ace_container *ac; - struct list_head group_l; - - INIT_LIST_HEAD(&group_l); - ace = list_entry(*p, struct nfs4_ace, l_ace); - - /* group owner (mask and allow aces) */ + struct posix_acl_entry *pace; + struct posix_acl *pacl; + int nace; + int i, error = 0; - if (pacl->a_count != 3) { - /* then the group owner should be preceded by mask */ - if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) - goto out; - if (*mask_ace && - !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) - goto out; - *mask_ace = ace; - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; + nace = 4 + state->users->n + state->groups->n; + pacl = posix_acl_alloc(nace, GFP_KERNEL); + if (!pacl) + return ERR_PTR(-ENOMEM); - if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace)) - goto out; + pace = pacl->a_entries; + pace->e_tag = ACL_USER_OBJ; + error = check_deny(state->owner.deny, 1); + if (error) + goto out_err; + low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; + + for (i=0; i < state->users->n; i++) { + pace++; + pace->e_tag = ACL_USER; + error = check_deny(state->users->aces[i].perms.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->users->aces[i].perms.allow, + &pace->e_perm, flags); + pace->e_id = state->users->aces[i].uid; + add_to_mask(state, &state->users->aces[i].perms); } - if (ace2type(ace) != ACL_GROUP_OBJ) - goto out; - - ac = kmalloc(sizeof(*ac), GFP_KERNEL); - error = -ENOMEM; - if (ac == NULL) - goto out; - ac->ace = ace; - list_add_tail(&ac->ace_l, &group_l); - - error = -EINVAL; - if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) - goto out; - - error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags); - if (error < 0) - goto out; - - error = -EINVAL; - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - - /* groups (mask and allow aces) */ - - while (ace2type(ace) == ACL_GROUP) { - if (*mask_ace == NULL) - goto out; - - if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE || - !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) - goto out; - *mask_ace = ace; + pace++; + pace->e_tag = ACL_GROUP_OBJ; + error = check_deny(state->group.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; + add_to_mask(state, &state->group); + + for (i=0; i < state->groups->n; i++) { + pace++; + pace->e_tag = ACL_GROUP; + error = check_deny(state->groups->aces[i].perms.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->groups->aces[i].perms.allow, + &pace->e_perm, flags); + pace->e_id = state->groups->aces[i].uid; + add_to_mask(state, &state->groups->aces[i].perms); + } - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - ac = kmalloc(sizeof(*ac), GFP_KERNEL); - error = -ENOMEM; - if (ac == NULL) - goto out; - error = -EINVAL; - if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE || - !same_who(ace, *mask_ace)) - goto out; + pace++; + pace->e_tag = ACL_MASK; + low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; - ac->ace = ace; - list_add_tail(&ac->ace_l, &group_l); + pace++; + pace->e_tag = ACL_OTHER; + error = check_deny(state->other.deny, 0); + if (error) + goto out_err; + low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); + pace->e_id = ACL_UNDEFINED_ID; - error = write_pace(ace, pacl, pace, ACL_GROUP, flags); - if (error < 0) - goto out; - error = -EINVAL; - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - } + return pacl; +out_err: + posix_acl_release(pacl); + return ERR_PTR(error); +} - /* group owner (deny ace) */ +static inline void allow_bits(struct posix_ace_state *astate, u32 mask) +{ + /* Allow all bits in the mask not already denied: */ + astate->allow |= mask & ~astate->deny; +} - if (ace2type(ace) != ACL_GROUP_OBJ) - goto out; - ac = list_entry(group_l.next, struct ace_container, ace_l); - ace2 = ac->ace; - if (!complementary_ace_pair(ace2, ace, flags)) - goto out; - list_del(group_l.next); - kfree(ac); +static inline void deny_bits(struct posix_ace_state *astate, u32 mask) +{ + /* Deny all bits in the mask not already allowed: */ + astate->deny |= mask & ~astate->allow; +} - /* groups (deny aces) */ +static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) +{ + int i; - while (!list_empty(&group_l)) { - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - if (ace2type(ace) != ACL_GROUP) - goto out; - ac = list_entry(group_l.next, struct ace_container, ace_l); - ace2 = ac->ace; - if (!complementary_ace_pair(ace2, ace, flags)) - goto out; - list_del(group_l.next); - kfree(ac); - } + for (i = 0; i < a->n; i++) + if (a->aces[i].uid == uid) + return i; + /* Not found: */ + a->n++; + a->aces[i].uid = uid; + a->aces[i].perms.allow = state->everyone.allow; + a->aces[i].perms.deny = state->everyone.deny; - ace = get_next_v4_ace(p, &n4acl->ace_head); - if (ace == NULL) - goto out; - if (ace2type(ace) != ACL_OTHER) - goto out; - error = 0; -out: - while (!list_empty(&group_l)) { - ac = list_entry(group_l.next, struct ace_container, ace_l); - list_del(group_l.next); - kfree(ac); - } - return error; + return i; } -static inline int -mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p, - struct nfs4_ace **mask_ace, - struct posix_acl *pacl, struct posix_acl_entry **pace, - unsigned int flags) +static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) { - int error = -EINVAL; - struct nfs4_ace *ace; + int i; - ace = list_entry(*p, struct nfs4_ace, l_ace); - if (pacl->a_count != 3) { - if (*mask_ace == NULL) - goto out; - (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags); - write_pace(*mask_ace, pacl, pace, ACL_MASK, flags); - } - error = 0; -out: - return error; + for (i=0; i < a->n; i++) + deny_bits(&a->aces[i].perms, mask); } -static inline int -other_from_v4(struct nfs4_acl *n4acl, struct list_head **p, - struct posix_acl *pacl, struct posix_acl_entry **pace, - unsigned int flags) +static void allow_bits_array(struct posix_ace_state_array *a, u32 mask) { - int error = -EINVAL; - struct nfs4_ace *ace, *ace2; + int i; - ace = list_entry(*p, struct nfs4_ace, l_ace); - if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) - goto out; - error = write_pace(ace, pacl, pace, ACL_OTHER, flags); - if (error < 0) - goto out; - error = -EINVAL; - ace2 = get_next_v4_ace(p, &n4acl->ace_head); - if (ace2 == NULL) - goto out; - if (!complementary_ace_pair(ace, ace2, flags)) - goto out; - error = 0; -out: - return error; + for (i=0; i < a->n; i++) + allow_bits(&a->aces[i].perms, mask); } -static int -calculate_posix_ace_count(struct nfs4_acl *n4acl) +static void process_one_v4_ace(struct posix_acl_state *state, + struct nfs4_ace *ace) { - if (n4acl->naces == 6) /* owner, owner group, and other only */ - return 3; - else { /* Otherwise there must be a mask entry. */ - /* Also, the remaining entries are for named users and - * groups, and come in threes (mask, allow, deny): */ - if (n4acl->naces < 7) - return -EINVAL; - if ((n4acl->naces - 7) % 3) - return -EINVAL; - return 4 + (n4acl->naces - 7)/3; + u32 mask = ace->access_mask; + int i; + + switch (ace2type(ace)) { + case ACL_USER_OBJ: + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->owner, mask); + } else { + deny_bits(&state->owner, mask); + } + break; + case ACL_USER: + i = find_uid(state, state->users, ace->who); + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->users->aces[i].perms, mask); + } else { + deny_bits(&state->users->aces[i].perms, mask); + mask = state->users->aces[i].perms.deny; + deny_bits(&state->owner, mask); + } + break; + case ACL_GROUP_OBJ: + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->group, mask); + } else { + deny_bits(&state->group, mask); + mask = state->group.deny; + deny_bits(&state->owner, mask); + deny_bits(&state->everyone, mask); + deny_bits_array(state->users, mask); + deny_bits_array(state->groups, mask); + } + break; + case ACL_GROUP: + i = find_uid(state, state->groups, ace->who); + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->groups->aces[i].perms, mask); + } else { + deny_bits(&state->groups->aces[i].perms, mask); + mask = state->groups->aces[i].perms.deny; + deny_bits(&state->owner, mask); + deny_bits(&state->group, mask); + deny_bits(&state->everyone, mask); + deny_bits_array(state->users, mask); + deny_bits_array(state->groups, mask); + } + break; + case ACL_OTHER: + if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { + allow_bits(&state->owner, mask); + allow_bits(&state->group, mask); + allow_bits(&state->other, mask); + allow_bits(&state->everyone, mask); + allow_bits_array(state->users, mask); + allow_bits_array(state->groups, mask); + } else { + deny_bits(&state->owner, mask); + deny_bits(&state->group, mask); + deny_bits(&state->other, mask); + deny_bits(&state->everyone, mask); + deny_bits_array(state->users, mask); + deny_bits_array(state->groups, mask); + } } } - static struct posix_acl * _nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) { + struct posix_acl_state state; struct posix_acl *pacl; - int error = -EINVAL, nace = 0; - struct list_head *p; - struct nfs4_ace *mask_ace = NULL; - struct posix_acl_entry *pace; - - nace = calculate_posix_ace_count(n4acl); - if (nace < 0) - goto out_err; - - pacl = posix_acl_alloc(nace, GFP_KERNEL); - error = -ENOMEM; - if (pacl == NULL) - goto out_err; - - pace = &pacl->a_entries[0]; - p = &n4acl->ace_head; - - error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags); - if (error) - goto out_acl; - - error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); - if (error) - goto out_acl; + struct nfs4_ace *ace; + int ret; - error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, - flags); - if (error) - goto out_acl; + ret = init_state(&state, n4acl->naces); + if (ret) + return ERR_PTR(ret); - error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); - if (error) - goto out_acl; - error = other_from_v4(n4acl, &p, pacl, &pace, flags); - if (error) - goto out_acl; + list_for_each_entry(ace, &n4acl->ace_head, l_ace) + process_one_v4_ace(&state, ace); - error = -EINVAL; - if (p->next != &n4acl->ace_head) - goto out_acl; - if (pace != pacl->a_entries + pacl->a_count) - goto out_acl; + pacl = posix_state_to_acl(&state, flags); - sort_pacl(pacl); + free_state(&state); - return pacl; -out_acl: - posix_acl_release(pacl); -out_err: - pacl = ERR_PTR(error); + if (!IS_ERR(pacl)) + sort_pacl(pacl); return pacl; } @@ -785,22 +700,41 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) list_for_each_safe(h, n, &acl->ace_head) { ace = list_entry(h, struct nfs4_ace, l_ace); - if ((ace->flag & NFS4_INHERITANCE_FLAGS) - != NFS4_INHERITANCE_FLAGS) - continue; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && + ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + return -EINVAL; - error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, - ace->access_mask, ace->whotype, ace->who); - if (error < 0) - goto out; + if (ace->flag & ~NFS4_SUPPORTED_FLAGS) + return -EINVAL; - list_del(h); - kfree(ace); - acl->naces--; + switch (ace->flag & NFS4_INHERITANCE_FLAGS) { + case 0: + /* Leave this ace in the effective acl: */ + continue; + case NFS4_INHERITANCE_FLAGS: + /* Add this ace to the default acl and remove it + * from the effective acl: */ + error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, + ace->access_mask, ace->whotype, ace->who); + if (error) + return error; + list_del(h); + kfree(ace); + acl->naces--; + break; + case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE: + /* Add this ace to the default, but leave it in + * the effective acl as well: */ + error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, + ace->access_mask, ace->whotype, ace->who); + if (error) + return error; + break; + default: + return -EINVAL; + } } - -out: - return error; + return 0; } static short @@ -930,23 +864,6 @@ nfs4_acl_write_who(int who, char *p) return -1; } -static inline int -match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who) -{ - switch (ace->whotype) { - case NFS4_ACL_WHO_NAMED: - return who == ace->who; - case NFS4_ACL_WHO_OWNER: - return who == owner; - case NFS4_ACL_WHO_GROUP: - return who == group; - case NFS4_ACL_WHO_EVERYONE: - return 1; - default: - return 0; - } -} - EXPORT_SYMBOL(nfs4_acl_new); EXPORT_SYMBOL(nfs4_acl_free); EXPORT_SYMBOL(nfs4_acl_add_ace); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 15ded7a30a72..8333db12caca 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -646,7 +646,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ *p++ = nfssvc_boot.tv_usec; status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, - write->wr_vec, write->wr_vlen, write->wr_buflen, + rqstp->rq_vec, write->wr_vlen, write->wr_buflen, &write->wr_how_written); if (filp) fput(filp); @@ -802,13 +802,29 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH * require a valid current filehandle */ - if ((!current_fh->fh_dentry) && - !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || - (op->opnum == OP_SETCLIENTID) || - (op->opnum == OP_SETCLIENTID_CONFIRM) || - (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || - (op->opnum == OP_RELEASE_LOCKOWNER))) { - op->status = nfserr_nofilehandle; + if (!current_fh->fh_dentry) { + if (!((op->opnum == OP_PUTFH) || + (op->opnum == OP_PUTROOTFH) || + (op->opnum == OP_SETCLIENTID) || + (op->opnum == OP_SETCLIENTID_CONFIRM) || + (op->opnum == OP_RENEW) || + (op->opnum == OP_RESTOREFH) || + (op->opnum == OP_RELEASE_LOCKOWNER))) { + op->status = nfserr_nofilehandle; + goto encode_op; + } + } + /* Check must be done at start of each operation, except + * for GETATTR and ops not listed as returning NFS4ERR_MOVED + */ + else if (current_fh->fh_export->ex_fslocs.migrated && + !((op->opnum == OP_GETATTR) || + (op->opnum == OP_PUTROOTFH) || + (op->opnum == OP_PUTPUBFH) || + (op->opnum == OP_RENEW) || + (op->opnum == OP_SETCLIENTID) || + (op->opnum == OP_RELEASE_LOCKOWNER))) { + op->status = nfserr_moved; goto encode_op; } switch (op->opnum) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5be00436b5b8..41fc241b729a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -60,6 +60,14 @@ #define NFSDDBG_FACILITY NFSDDBG_XDR +/* + * As per referral draft, the fsid for a referral MUST be different from the fsid of the containing + * directory in order to indicate to the client that a filesystem boundary is present + * We use a fixed fsid for a referral + */ +#define NFS4_REFERRAL_FSID_MAJOR 0x8000000ULL +#define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL + static int check_filename(char *str, int len, int err) { @@ -926,26 +934,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); goto xdr_error; } - write->wr_vec[0].iov_base = p; - write->wr_vec[0].iov_len = avail; + argp->rqstp->rq_vec[0].iov_base = p; + argp->rqstp->rq_vec[0].iov_len = avail; v = 0; len = write->wr_buflen; - while (len > write->wr_vec[v].iov_len) { - len -= write->wr_vec[v].iov_len; + while (len > argp->rqstp->rq_vec[v].iov_len) { + len -= argp->rqstp->rq_vec[v].iov_len; v++; - write->wr_vec[v].iov_base = page_address(argp->pagelist[0]); + argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen >= PAGE_SIZE) { - write->wr_vec[v].iov_len = PAGE_SIZE; + argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE; argp->pagelen -= PAGE_SIZE; } else { - write->wr_vec[v].iov_len = argp->pagelen; + argp->rqstp->rq_vec[v].iov_len = argp->pagelen; argp->pagelen -= len; } } - argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len); - argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); - write->wr_vec[v].iov_len = len; + argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len); + argp->p = (u32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); + argp->rqstp->rq_vec[v].iov_len = len; write->wr_vlen = v+1; DECODE_TAIL; @@ -1223,6 +1231,119 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) stateowner->so_replay.rp_buflen); \ } } while (0); +/* Encode as an array of strings the string given with components + * seperated @sep. + */ +static int nfsd4_encode_components(char sep, char *components, + u32 **pp, int *buflen) +{ + u32 *p = *pp; + u32 *countp = p; + int strlen, count=0; + char *str, *end; + + dprintk("nfsd4_encode_components(%s)\n", components); + if ((*buflen -= 4) < 0) + return nfserr_resource; + WRITE32(0); /* We will fill this in with @count later */ + end = str = components; + while (*end) { + for (; *end && (*end != sep); end++) + ; /* Point to end of component */ + strlen = end - str; + if (strlen) { + if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) + return nfserr_resource; + WRITE32(strlen); + WRITEMEM(str, strlen); + count++; + } + else + end++; + str = end; + } + *pp = p; + p = countp; + WRITE32(count); + return 0; +} + +/* + * encode a location element of a fs_locations structure + */ +static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, + u32 **pp, int *buflen) +{ + int status; + u32 *p = *pp; + + status = nfsd4_encode_components(':', location->hosts, &p, buflen); + if (status) + return status; + status = nfsd4_encode_components('/', location->path, &p, buflen); + if (status) + return status; + *pp = p; + return 0; +} + +/* + * Return the path to an export point in the pseudo filesystem namespace + * Returned string is safe to use as long as the caller holds a reference + * to @exp. + */ +static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp) +{ + struct svc_fh tmp_fh; + char *path, *rootpath; + int stat; + + fh_init(&tmp_fh, NFS4_FHSIZE); + stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle); + if (stat) + return ERR_PTR(stat); + rootpath = tmp_fh.fh_export->ex_path; + + path = exp->ex_path; + + if (strncmp(path, rootpath, strlen(rootpath))) { + printk("nfsd: fs_locations failed;" + "%s is not contained in %s\n", path, rootpath); + return ERR_PTR(-EOPNOTSUPP); + } + + return path + strlen(rootpath); +} + +/* + * encode a fs_locations structure + */ +static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp, + struct svc_export *exp, + u32 **pp, int *buflen) +{ + int status, i; + u32 *p = *pp; + struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; + char *root = nfsd4_path(rqstp, exp); + + if (IS_ERR(root)) + return PTR_ERR(root); + status = nfsd4_encode_components('/', root, &p, buflen); + if (status) + return status; + if ((*buflen -= 4) < 0) + return nfserr_resource; + WRITE32(fslocs->locations_count); + for (i=0; i<fslocs->locations_count; i++) { + status = nfsd4_encode_fs_location4(&fslocs->locations[i], + &p, buflen); + if (status) + return status; + } + *pp = p; + return 0; +} static u32 nfs4_ftypes[16] = { NF4BAD, NF4FIFO, NF4CHR, NF4BAD, @@ -1272,6 +1393,25 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); } +#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ + FATTR4_WORD0_RDATTR_ERROR) +#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID + +static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) +{ + /* As per referral draft: */ + if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS || + *bmval1 & ~WORD1_ABSENT_FS_ATTRS) { + if (*bmval0 & FATTR4_WORD0_RDATTR_ERROR || + *bmval0 & FATTR4_WORD0_FS_LOCATIONS) + *rdattr_err = NFSERR_MOVED; + else + return nfserr_moved; + } + *bmval0 &= WORD0_ABSENT_FS_ATTRS; + *bmval1 &= WORD1_ABSENT_FS_ATTRS; + return 0; +} /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle @@ -1294,6 +1434,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, u32 *attrlenp; u32 dummy; u64 dummy64; + u32 rdattr_err = 0; u32 *p = buffer; int status; int aclsupport = 0; @@ -1303,6 +1444,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); + if (exp->ex_fslocs.migrated) { + status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); + if (status) + goto out; + } + status = vfs_getattr(exp->ex_mnt, dentry, &stat); if (status) goto out_nfserr; @@ -1334,6 +1481,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_nfserr; } } + if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { + if (exp->ex_fslocs.locations == NULL) { + bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS; + } + } if ((buflen -= 16) < 0) goto out_resource; @@ -1343,12 +1495,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, attrlenp = p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { + u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0; if ((buflen -= 12) < 0) goto out_resource; + if (!aclsupport) + word0 &= ~FATTR4_WORD0_ACL; + if (!exp->ex_fslocs.locations) + word0 &= ~FATTR4_WORD0_FS_LOCATIONS; WRITE32(2); - WRITE32(aclsupport ? - NFSD_SUPPORTED_ATTRS_WORD0 : - NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL); + WRITE32(word0); WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); } if (bmval0 & FATTR4_WORD0_TYPE) { @@ -1402,7 +1557,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (bmval0 & FATTR4_WORD0_FSID) { if ((buflen -= 16) < 0) goto out_resource; - if (is_fsid(fhp, rqstp->rq_reffh)) { + if (exp->ex_fslocs.migrated) { + WRITE64(NFS4_REFERRAL_FSID_MAJOR); + WRITE64(NFS4_REFERRAL_FSID_MINOR); + } else if (is_fsid(fhp, rqstp->rq_reffh)) { WRITE64((u64)exp->ex_fsid); WRITE64((u64)0); } else { @@ -1425,7 +1583,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(0); + WRITE32(rdattr_err); } if (bmval0 & FATTR4_WORD0_ACL) { struct nfs4_ace *ace; @@ -1513,6 +1671,13 @@ out_acl: goto out_resource; WRITE64((u64) statfs.f_files); } + if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { + status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { if ((buflen -= 4) < 0) goto out_resource; @@ -1536,12 +1701,12 @@ out_acl: if (bmval0 & FATTR4_WORD0_MAXREAD) { if ((buflen -= 8) < 0) goto out_resource; - WRITE64((u64) NFSSVC_MAXBLKSIZE); + WRITE64((u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { if ((buflen -= 8) < 0) goto out_resource; - WRITE64((u64) NFSSVC_MAXBLKSIZE); + WRITE64((u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { if ((buflen -= 4) < 0) @@ -1845,7 +2010,6 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, resp->p, &buflen, getattr->ga_bmval, resp->rqstp); - if (!nfserr) resp->p += buflen; return nfserr; @@ -2039,7 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n } static int -nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read) +nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_read *read) { u32 eof; int v, pn; @@ -2054,31 +2219,33 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read RESERVE_SPACE(8); /* eof flag and byte count */ - maxcount = NFSSVC_MAXBLKSIZE; + maxcount = svc_max_payload(resp->rqstp); if (maxcount > read->rd_length) maxcount = read->rd_length; len = maxcount; v = 0; while (len > 0) { - pn = resp->rqstp->rq_resused; - svc_take_page(resp->rqstp); - read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); - read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; + pn = resp->rqstp->rq_resused++; + resp->rqstp->rq_vec[v].iov_base = + page_address(resp->rqstp->rq_respages[pn]); + resp->rqstp->rq_vec[v].iov_len = + len < PAGE_SIZE ? len : PAGE_SIZE; v++; len -= PAGE_SIZE; } read->rd_vlen = v; nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, - read->rd_offset, read->rd_iov, read->rd_vlen, + read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount); if (nfserr == nfserr_symlink) nfserr = nfserr_inval; if (nfserr) return nfserr; - eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); + eof = (read->rd_offset + maxcount >= + read->rd_fhp->fh_dentry->d_inode->i_size); WRITE32(eof); WRITE32(maxcount); @@ -2088,7 +2255,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read resp->xbuf->page_len = maxcount; /* Use rest of head for padding and remaining ops: */ - resp->rqstp->rq_restailpage = 0; resp->xbuf->tail[0].iov_base = p; resp->xbuf->tail[0].iov_len = 0; if (maxcount&3) { @@ -2113,8 +2279,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r if (resp->xbuf->page_len) return nfserr_resource; - svc_take_page(resp->rqstp); - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); maxcount = PAGE_SIZE; RESERVE_SPACE(4); @@ -2138,7 +2303,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r resp->xbuf->page_len = maxcount; /* Use rest of head for padding and remaining ops: */ - resp->rqstp->rq_restailpage = 0; resp->xbuf->tail[0].iov_base = p; resp->xbuf->tail[0].iov_len = 0; if (maxcount&3) { @@ -2189,8 +2353,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re goto err_no_verf; } - svc_take_page(resp->rqstp); - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); readdir->common.err = 0; readdir->buflen = maxcount; readdir->buffer = page; @@ -2215,10 +2378,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re p = readdir->buffer; *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + resp->xbuf->page_len = ((char*)p) - (char*)page_address( + resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); /* Use rest of head for padding and remaining ops: */ - resp->rqstp->rq_restailpage = 0; resp->xbuf->tail[0].iov_base = tailbase; resp->xbuf->tail[0].iov_len = 0; resp->p = resp->xbuf->tail[0].iov_base; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 5c6a477c20ec..39aed901514b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -57,6 +57,7 @@ enum { NFSD_Pool_Threads, NFSD_Versions, NFSD_Ports, + NFSD_MaxBlkSize, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops @@ -82,6 +83,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size); static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); +static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); @@ -100,6 +102,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Pool_Threads] = write_pool_threads, [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, + [NFSD_MaxBlkSize] = write_maxblksize, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_RecoveryDir] = write_recoverydir, @@ -523,18 +526,20 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) err = nfsd_create_serv(); if (!err) { int proto = 0; - err = lockd_up(proto); - if (!err) { - err = svc_addsock(nfsd_serv, fd, buf, &proto); - if (err) - lockd_down(); + err = svc_addsock(nfsd_serv, fd, buf, &proto); + if (err >= 0) { + err = lockd_up(proto); + if (err < 0) + svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); } /* Decrease the count, but don't shutdown the * the service */ + lock_kernel(); nfsd_serv->sv_nrthreads--; + unlock_kernel(); } - return err; + return err < 0 ? err : 0; } if (buf[0] == '-') { char *toclose = kstrdup(buf+1, GFP_KERNEL); @@ -545,12 +550,43 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) if (nfsd_serv) len = svc_sock_names(buf, nfsd_serv, toclose); unlock_kernel(); + if (len >= 0) + lockd_down(); kfree(toclose); return len; } return -EINVAL; } +int nfsd_max_blksize; + +static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + if (size > 0) { + int bsize; + int rv = get_int(&mesg, &bsize); + if (rv) + return rv; + /* force bsize into allowed range and + * required alignment. + */ + if (bsize < 1024) + bsize = 1024; + if (bsize > NFSSVC_MAXBLKSIZE) + bsize = NFSSVC_MAXBLKSIZE; + bsize &= ~(1024-1); + lock_kernel(); + if (nfsd_serv && nfsd_serv->sv_nrthreads) { + unlock_kernel(); + return -EBUSY; + } + nfsd_max_blksize = bsize; + unlock_kernel(); + } + return sprintf(buf, "%d\n", nfsd_max_blksize); +} + #ifdef CONFIG_NFSD_V4 extern time_t nfs4_leasetime(void); @@ -616,6 +652,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 06cd0db0f32b..9ee1dab5d44a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -146,20 +146,20 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, * status, 17 words for fattr, and 1 word for the byte count. */ - if (NFSSVC_MAXBLKSIZE < argp->count) { + if (NFSSVC_MAXBLKSIZE_V2 < argp->count) { printk(KERN_NOTICE "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), ntohs(rqstp->rq_addr.sin_port), argp->count); - argp->count = NFSSVC_MAXBLKSIZE; + argp->count = NFSSVC_MAXBLKSIZE_V2; } svc_reserve(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, argp->offset, - argp->vec, argp->vlen, + rqstp->rq_vec, argp->vlen, &resp->count); if (nfserr) return nfserr; @@ -185,7 +185,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, argp->offset, - argp->vec, argp->vlen, + rqstp->rq_vec, argp->vlen, argp->len, &stable); return nfsd_return_attrs(nfserr, resp); @@ -225,7 +225,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, nfserr = nfserr_exist; if (isdotent(argp->name, argp->len)) goto done; - fh_lock(dirfhp); + fh_lock_nested(dirfhp, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { nfserr = nfserrno(PTR_ERR(dchild)); @@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = { PROC(none, void, void, none, RC_NOCACHE, ST), PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), - PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4), + PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4), PROC(none, void, void, none, RC_NOCACHE, ST), PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 19443056ec30..6fa6340a5fb8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -198,9 +198,26 @@ int nfsd_create_serv(void) unlock_kernel(); return 0; } + if (nfsd_max_blksize == 0) { + /* choose a suitable default */ + struct sysinfo i; + si_meminfo(&i); + /* Aim for 1/4096 of memory per thread + * This gives 1MB on 4Gig machines + * But only uses 32K on 128M machines. + * Bottom out at 8K on 32M and smaller. + * Of course, this is only a default. + */ + nfsd_max_blksize = NFSSVC_MAXBLKSIZE; + i.totalram <<= PAGE_SHIFT - 12; + while (nfsd_max_blksize > i.totalram && + nfsd_max_blksize >= 8*1024*2) + nfsd_max_blksize /= 2; + } atomic_set(&nfsd_busy, 0); - nfsd_serv = svc_create_pooled(&nfsd_program, NFSD_BUFSIZE, + nfsd_serv = svc_create_pooled(&nfsd_program, + NFSD_BUFSIZE - NFSSVC_MAXBLKSIZE + nfsd_max_blksize, nfsd_last_thread, nfsd, SIG_NOCLEAN, THIS_MODULE); if (nfsd_serv == NULL) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 3f14a17eaa6e..1135c0d14557 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -254,19 +254,18 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p, len = args->count = ntohl(*p++); p++; /* totalcount - unused */ - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > NFSSVC_MAXBLKSIZE_V2) + len = NFSSVC_MAXBLKSIZE_V2; /* set up somewhere to store response. * We take pages, put them on reslist and include in iovec */ v=0; while (len > 0) { - pn=rqstp->rq_resused; - svc_take_page(rqstp); - args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); - args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; - len -= args->vec[v].iov_len; + pn = rqstp->rq_resused++; + rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; + len -= rqstp->rq_vec[v].iov_len; v++; } args->vlen = v; @@ -286,21 +285,21 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, args->offset = ntohl(*p++); /* offset */ p++; /* totalcount */ len = args->len = ntohl(*p++); - args->vec[0].iov_base = (void*)p; - args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - + rqstp->rq_vec[0].iov_base = (void*)p; + rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - (((void*)p) - rqstp->rq_arg.head[0].iov_base); - if (len > NFSSVC_MAXBLKSIZE) - len = NFSSVC_MAXBLKSIZE; + if (len > NFSSVC_MAXBLKSIZE_V2) + len = NFSSVC_MAXBLKSIZE_V2; v = 0; - while (len > args->vec[v].iov_len) { - len -= args->vec[v].iov_len; + while (len > rqstp->rq_vec[v].iov_len) { + len -= rqstp->rq_vec[v].iov_len; v++; - args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); - args->vec[v].iov_len = PAGE_SIZE; + rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]); + rqstp->rq_vec[v].iov_len = PAGE_SIZE; } - args->vec[v].iov_len = len; + rqstp->rq_vec[v].iov_len = len; args->vlen = v+1; - return args->vec[0].iov_len > 0; + return rqstp->rq_vec[0].iov_len > 0; } int @@ -333,8 +332,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka { if (!(p = decode_fh(p, &args->fh))) return 0; - svc_take_page(rqstp); - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); return xdr_argsize_check(rqstp, p); } @@ -375,8 +373,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - svc_take_page(rqstp); - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); return xdr_argsize_check(rqstp, p); } @@ -416,7 +413,6 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, rqstp->rq_res.page_len = resp->len; if (resp->len & 3) { /* need to pad the tail */ - rqstp->rq_restailpage = 0; rqstp->rq_res.tail[0].iov_base = p; *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); @@ -436,7 +432,6 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, rqstp->rq_res.page_len = resp->count; if (resp->count & 3) { /* need to pad the tail */ - rqstp->rq_restailpage = 0; rqstp->rq_res.tail[0].iov_base = p; *p = 0; rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); @@ -463,7 +458,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p, { struct kstatfs *stat = &resp->stats; - *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */ + *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */ *p++ = htonl(stat->f_bsize); *p++ = htonl(stat->f_blocks); *p++ = htonl(stat->f_bfree); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 443ebc52e382..1141bd29e4e3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -54,6 +54,7 @@ #include <linux/nfsd_idmap.h> #include <linux/security.h> #endif /* CONFIG_NFSD_V4 */ +#include <linux/jhash.h> #include <asm/uaccess.h> @@ -81,10 +82,19 @@ struct raparms { dev_t p_dev; int p_set; struct file_ra_state p_ra; + unsigned int p_hindex; }; +struct raparm_hbucket { + struct raparms *pb_head; + spinlock_t pb_lock; +} ____cacheline_aligned_in_smp; + static struct raparms * raparml; -static struct raparms * raparm_cache; +#define RAPARM_HASH_BITS 4 +#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) +#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) +static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed @@ -437,13 +447,11 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, } else if (error < 0) goto out_nfserr; - if (pacl) { - error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); - if (error < 0) - goto out_nfserr; - } + error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); + if (error < 0) + goto out_nfserr; - if (dpacl) { + if (S_ISDIR(inode->i_mode)) { error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); if (error < 0) goto out_nfserr; @@ -743,16 +751,20 @@ nfsd_sync_dir(struct dentry *dp) * Obtain the readahead parameters for the file * specified by (dev, ino). */ -static DEFINE_SPINLOCK(ra_lock); static inline struct raparms * nfsd_get_raparms(dev_t dev, ino_t ino) { struct raparms *ra, **rap, **frap = NULL; int depth = 0; + unsigned int hash; + struct raparm_hbucket *rab; - spin_lock(&ra_lock); - for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { + hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; + rab = &raparm_hash[hash]; + + spin_lock(&rab->pb_lock); + for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { if (ra->p_ino == ino && ra->p_dev == dev) goto found; depth++; @@ -761,7 +773,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) } depth = nfsdstats.ra_size*11/10; if (!frap) { - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); return NULL; } rap = frap; @@ -769,15 +781,16 @@ nfsd_get_raparms(dev_t dev, ino_t ino) ra->p_dev = dev; ra->p_ino = ino; ra->p_set = 0; + ra->p_hindex = hash; found: - if (rap != &raparm_cache) { + if (rap != &rab->pb_head) { *rap = ra->p_next; - ra->p_next = raparm_cache; - raparm_cache = ra; + ra->p_next = rab->pb_head; + rab->pb_head = ra; } ra->p_count++; nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); return ra; } @@ -791,22 +804,26 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset { unsigned long count = desc->count; struct svc_rqst *rqstp = desc->arg.data; + struct page **pp = rqstp->rq_respages + rqstp->rq_resused; if (size > count) size = count; if (rqstp->rq_res.page_len == 0) { get_page(page); - rqstp->rq_respages[rqstp->rq_resused++] = page; + put_page(*pp); + *pp = page; + rqstp->rq_resused++; rqstp->rq_res.page_base = offset; rqstp->rq_res.page_len = size; - } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) { + } else if (page != pp[-1]) { get_page(page); - rqstp->rq_respages[rqstp->rq_resused++] = page; + put_page(*pp); + *pp = page; + rqstp->rq_resused++; rqstp->rq_res.page_len += size; - } else { + } else rqstp->rq_res.page_len += size; - } desc->count = count - size; desc->written += size; @@ -837,7 +854,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, file->f_ra = ra->p_ra; if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { - svc_pushback_unused_pages(rqstp); + rqstp->rq_resused = 1; err = file->f_op->sendfile(file, &offset, *count, nfsd_read_actor, rqstp); } else { @@ -849,11 +866,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, /* Write back readahead params */ if (ra) { - spin_lock(&ra_lock); + struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; + spin_lock(&rab->pb_lock); ra->p_ra = file->f_ra; ra->p_set = 1; ra->p_count--; - spin_unlock(&ra_lock); + spin_unlock(&rab->pb_lock); } if (err >= 0) { @@ -1829,11 +1847,11 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) void nfsd_racache_shutdown(void) { - if (!raparm_cache) + if (!raparml) return; dprintk("nfsd: freeing readahead buffers.\n"); kfree(raparml); - raparm_cache = raparml = NULL; + raparml = NULL; } /* * Initialize readahead param cache @@ -1842,19 +1860,31 @@ int nfsd_racache_init(int cache_size) { int i; + int j = 0; + int nperbucket; + - if (raparm_cache) + if (raparml) return 0; + if (cache_size < 2*RAPARM_HASH_SIZE) + cache_size = 2*RAPARM_HASH_SIZE; raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); if (raparml != NULL) { dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); + for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { + raparm_hash[i].pb_head = NULL; + spin_lock_init(&raparm_hash[i].pb_lock); + } + nperbucket = cache_size >> RAPARM_HASH_BITS; memset(raparml, 0, sizeof(struct raparms) * cache_size); for (i = 0; i < cache_size - 1; i++) { - raparml[i].p_next = raparml + i + 1; + if (i % nperbucket == 0) + raparm_hash[j++].pb_head = raparml + i; + if (i % nperbucket < nperbucket-1) + raparml[i].p_next = raparml + i + 1; } - raparm_cache = raparml; } else { printk(KERN_WARNING "nfsd: Could not allocate memory read-ahead cache.\n"); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 7e5a2f5ebeb0..9c69bcacad22 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1780,7 +1780,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, err = -EDQUOT; goto out_end_trans; } - if (!dir || !dir->i_nlink) { + if (!dir->i_nlink) { err = -EPERM; goto out_bad_inode; } |