diff options
Diffstat (limited to 'fs')
113 files changed, 684 insertions, 798 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 3128aa948a4e..9ac4ffe9ac7d 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -32,6 +32,7 @@ #include <linux/inet.h> #include <linux/pagemap.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 775e26e82cbc..d93960429c09 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -33,6 +33,7 @@ #include <linux/inet.h> #include <linux/namei.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7624821729a0..c76cd8fa3f6c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -33,6 +33,7 @@ #include <linux/inet.h> #include <linux/namei.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8eb9263a67b9..7bdf8b326841 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -36,6 +36,7 @@ #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/idr.h> +#include <linux/sched.h> #include "debug.h" #include "v9fs.h" diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 30c296508497..de2ed5ca3351 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -232,8 +232,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/affs/inode.c b/fs/affs/inode.c index c5b9d73c084a..4609a6c13fe9 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -9,7 +9,7 @@ * * (C) 1991 Linus Torvalds - minix filesystem */ - +#include <linux/sched.h> #include "affs.h" extern const struct inode_operations affs_symlink_inode_operations; diff --git a/fs/affs/super.c b/fs/affs/super.c index beff7d21e6e2..6d0ebc321530 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -15,6 +15,7 @@ #include <linux/statfs.h> #include <linux/parser.h> #include <linux/magic.h> +#include <linux/sched.h> #include "affs.h" extern struct timezone sys_tz; @@ -87,11 +88,9 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct affs_inode_info *ei = (struct affs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - init_MUTEX(&ei->i_link_lock); - init_MUTEX(&ei->i_ext_lock); - inode_init_once(&ei->vfs_inode); - } + init_MUTEX(&ei->i_link_lock); + init_MUTEX(&ei->i_ext_lock); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index f64e40fefc02..bacf518c6fa8 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/circ_buf.h> +#include <linux/sched.h> #include "internal.h" unsigned afs_vnode_update_timeout = 10; diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 9b1311a1df51..175a567db78c 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/key.h> #include <linux/ctype.h> +#include <linux/sched.h> #include <keys/rxrpc-type.h> #include "internal.h" diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 719af4fb15dc..546c59522eb1 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -16,6 +16,7 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/ctype.h> +#include <linux/sched.h> #include "internal.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 47f5fed7195d..d196840127c6 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> +#include <linux/sched.h> #include "internal.h" struct afs_iget_data { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 4953ba5a6f44..2dac3ad2c44b 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -16,6 +16,7 @@ #include <linux/skbuff.h> #include <linux/rxrpc.h> #include <linux/key.h> +#include <linux/workqueue.h> #include "afs.h" #include "afs_vl.h" diff --git a/fs/afs/main.c b/fs/afs/main.c index f1f71ff7d5c6..cd21195bbb24 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -13,6 +13,7 @@ #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/completion.h> +#include <linux/sched.h> #include "internal.h" MODULE_DESCRIPTION("AFS Client File System"); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index d5601f617cdb..13df512aea9e 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/sched.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/afs/security.c b/fs/afs/security.c index e0ea88b63ebf..566fe712c682 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/ctype.h> +#include <linux/sched.h> #include <keys/rxrpc-type.h> #include "internal.h" diff --git a/fs/afs/super.c b/fs/afs/super.c index 579af632c8e8..2e8496ba1205 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -22,6 +22,7 @@ #include <linux/pagemap.h> #include <linux/parser.h> #include <linux/statfs.h> +#include <linux/sched.h> #include "internal.h" #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ @@ -47,7 +48,6 @@ struct file_system_type afs_fs_type = { static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, - .drop_inode = generic_delete_inode, .write_inode = afs_write_inode, .destroy_inode = afs_destroy_inode, .clear_inode = afs_clear_inode, @@ -452,17 +452,15 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, { struct afs_vnode *vnode = _vnode; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - memset(vnode, 0, sizeof(*vnode)); - inode_init_once(&vnode->vfs_inode); - init_waitqueue_head(&vnode->update_waitq); - mutex_init(&vnode->permits_lock); - mutex_init(&vnode->validate_lock); - spin_lock_init(&vnode->writeback_lock); - spin_lock_init(&vnode->lock); - INIT_LIST_HEAD(&vnode->writebacks); - INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); - } + memset(vnode, 0, sizeof(*vnode)); + inode_init_once(&vnode->vfs_inode); + init_waitqueue_head(&vnode->update_waitq); + mutex_init(&vnode->permits_lock); + mutex_init(&vnode->validate_lock); + spin_lock_init(&vnode->writeback_lock); + spin_lock_init(&vnode->lock); + INIT_LIST_HEAD(&vnode->writebacks); + INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work); } /* diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 3370cdb72566..09e3ad0fc7cc 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/sched.h> #include "internal.h" unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index c36c98ce2c3c..232c55dc245d 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/fs.h> +#include <linux/sched.h> #include "internal.h" #if 0 diff --git a/fs/afs/volume.c b/fs/afs/volume.c index dd160cada45d..8bab0e3437f9 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> +#include <linux/sched.h> #include "internal.h" static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; diff --git a/fs/afs/write.c b/fs/afs/write.c index 28f37516c126..a03b92a0fe1d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -206,7 +206,6 @@ int afs_prepare_write(struct file *file, struct page *page, _leave(" = %d [prep]", ret); return ret; } - SetPageUptodate(page); } try_again: @@ -311,8 +310,8 @@ int afs_commit_write(struct file *file, struct page *page, spin_unlock(&vnode->writeback_lock); } + SetPageUptodate(page); set_page_dirty(page); - if (PageDirty(page)) _debug("dirtied"); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index fe96108a788d..a5c5171c2828 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -292,10 +292,8 @@ befs_destroy_inode(struct inode *inode) static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; - - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&bi->vfs_inode); - } + + inode_init_once(&bi->vfs_inode); } static void diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index edc08d89aabc..58c7bd9f5301 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -248,8 +248,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct bfs_inode_info *bi = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&bi->vfs_inode); + inode_init_once(&bi->vfs_inode); } static int init_inodecache(void) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 72d0b412c376..330fd3fe8546 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -18,7 +18,7 @@ #include <linux/module.h> #include <linux/init.h> - +#include <linux/sched.h> #include <linux/binfmts.h> #include <linux/slab.h> #include <linux/ctype.h> diff --git a/fs/block_dev.c b/fs/block_dev.c index 742899240872..ea1480a16f51 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -458,17 +458,15 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - memset(bdev, 0, sizeof(*bdev)); - mutex_init(&bdev->bd_mutex); - sema_init(&bdev->bd_mount_sem, 1); - INIT_LIST_HEAD(&bdev->bd_inodes); - INIT_LIST_HEAD(&bdev->bd_list); + memset(bdev, 0, sizeof(*bdev)); + mutex_init(&bdev->bd_mutex); + sema_init(&bdev->bd_mount_sem, 1); + INIT_LIST_HEAD(&bdev->bd_inodes); + INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_list); + INIT_LIST_HEAD(&bdev->bd_holder_list); #endif - inode_init_once(&ei->vfs_inode); - } + inode_init_once(&ei->vfs_inode); } static inline void __bd_forget(struct inode *inode) diff --git a/fs/buffer.c b/fs/buffer.c index aecd057cd0e0..aa68206bd517 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -981,7 +981,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct page *page; struct buffer_head *bh; - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, index, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (!page) return NULL; @@ -2100,7 +2101,7 @@ int cont_prepare_write(struct page *page, unsigned offset, PAGE_CACHE_SIZE, get_block); if (status) goto out_unmap; - zero_user_page(page, zerofrom, PAGE_CACHE_SIZE - zerofrom, + zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom, KM_USER0); generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); unlock_page(new_page); @@ -2898,8 +2899,9 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { + INIT_LIST_HEAD(&ret->b_assoc_buffers); get_cpu_var(bh_accounting).nr++; recalc_bh_state(); put_cpu_var(bh_accounting); @@ -2918,17 +2920,6 @@ void free_buffer_head(struct buffer_head *bh) } EXPORT_SYMBOL(free_buffer_head); -static void -init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) -{ - if (flags & SLAB_CTOR_CONSTRUCTOR) { - struct buffer_head * bh = (struct buffer_head *)data; - - memset(bh, 0, sizeof(*bh)); - INIT_LIST_HEAD(&bh->b_assoc_buffers); - } -} - static void buffer_exit_cpu(int cpu) { int i; @@ -2955,12 +2946,8 @@ void __init buffer_init(void) { int nrpages; - bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| - SLAB_MEM_SPREAD), - init_buffer_head, - NULL); + bh_cachep = KMEM_CACHE(buffer_head, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); /* * Limit the bh occupancy to 10% of ZONE_NORMAL diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8568e100953c..d38c69b591cf 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -701,10 +701,8 @@ cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags) { struct cifsInodeInfo *cifsi = inode; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&cifsi->vfs_inode); - INIT_LIST_HEAD(&cifsi->lockList); - } + inode_init_once(&cifsi->vfs_inode); + INIT_LIST_HEAD(&cifsi->lockList); } static int diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 5d0527133266..fcb88fa8d2f2 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -16,6 +16,7 @@ #include <asm/uaccess.h> #include <linux/string.h> #include <linux/list.h> +#include <linux/sched.h> #include <linux/coda.h> #include <linux/coda_linux.h> diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 0aaff3651d14..dbff1bd4fb96 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -62,8 +62,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct coda_inode_info *ei = (struct coda_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } int coda_init_inodecache(void) diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index a5b5e631ba61..5faacdb1a479 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -16,7 +16,7 @@ #include <asm/system.h> #include <linux/signal.h> - +#include <linux/sched.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> diff --git a/fs/compat.c b/fs/compat.c index 7b21b0a82596..1de2331db844 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -2230,21 +2230,16 @@ asmlinkage long compat_sys_signalfd(int ufd, asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, const struct compat_itimerspec __user *utmr) { - long res; struct itimerspec t; struct itimerspec __user *ut; - res = -EFAULT; if (get_compat_itimerspec(&t, utmr)) - goto err_exit; + return -EFAULT; ut = compat_alloc_user_space(sizeof(*ut)); - if (copy_to_user(ut, &t, sizeof(t)) ) - goto err_exit; + if (copy_to_user(ut, &t, sizeof(t))) + return -EFAULT; - res = sys_timerfd(ufd, clockid, flags, ut); -err_exit: - return res; + return sys_timerfd(ufd, clockid, flags, ut); } #endif /* CONFIG_TIMERFD */ - diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 2ec9beac17cf..ddc003a9d214 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -32,6 +32,7 @@ #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> +#include <linux/sched.h> #include <linux/configfs.h> #include "configfs_internal.h" diff --git a/fs/dquot.c b/fs/dquot.c index 3a995841de90..8819d281500c 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1421,7 +1421,7 @@ int vfs_quota_off(struct super_block *sb, int type) /* If quota was reenabled in the meantime, we have * nothing to do */ if (!sb_has_quota_enabled(sb, cnt)) { - mutex_lock(&toputinode[cnt]->i_mutex); + mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); truncate_inode_pages(&toputinode[cnt]->i_data, 0); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 8cbf3f69ebe5..606128f5c927 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -583,8 +583,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) { struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static struct ecryptfs_cache_info { diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 3baf253be95a..a9d87c47f72d 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -19,7 +19,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ - +#include <linux/sched.h> #include "ecryptfs_kernel.h" static LIST_HEAD(ecryptfs_msg_ctx_free_list); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 0770c4b66f53..88ea6697908f 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -364,18 +364,14 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) { struct inode *inode = page->mapping->host; int end_byte_in_page; - char *page_virt; if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index) goto out; end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; if (to > end_byte_in_page) end_byte_in_page = to; - page_virt = kmap_atomic(page, KM_USER0); - memset((page_virt + end_byte_in_page), 0, - (PAGE_CACHE_SIZE - end_byte_in_page)); - kunmap_atomic(page_virt, KM_USER0); - flush_dcache_page(page); + zero_user_page(page, end_byte_in_page, + PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0); out: return 0; } @@ -740,7 +736,6 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) { int rc = 0; struct page *tmp_page; - char *tmp_page_virt; tmp_page = ecryptfs_get1page(file, index); if (IS_ERR(tmp_page)) { @@ -757,10 +752,7 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) page_cache_release(tmp_page); goto out; } - tmp_page_virt = kmap_atomic(tmp_page, KM_USER0); - memset(((char *)tmp_page_virt + start), 0, num_zeros); - kunmap_atomic(tmp_page_virt, KM_USER0); - flush_dcache_page(tmp_page); + zero_user_page(tmp_page, start, num_zeros, KM_USER0); rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " diff --git a/fs/efs/super.c b/fs/efs/super.c index ba7a8b9da0c1..e0a6839e68ae 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -72,8 +72,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct efs_inode_info *ei = (struct efs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/eventfd.c b/fs/eventfd.c index 480e2b3c4166..2ce19c000d2a 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -17,7 +17,6 @@ #include <linux/eventfd.h> struct eventfd_ctx { - spinlock_t lock; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the @@ -45,13 +44,13 @@ int eventfd_signal(struct file *file, int n) if (n < 0) return -EINVAL; - spin_lock_irqsave(&ctx->lock, flags); + spin_lock_irqsave(&ctx->wqh.lock, flags); if (ULLONG_MAX - ctx->count < n) n = (int) (ULLONG_MAX - ctx->count); ctx->count += n; if (waitqueue_active(&ctx->wqh)) wake_up_locked(&ctx->wqh); - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; } @@ -70,14 +69,14 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->wqh, wait); - spin_lock_irqsave(&ctx->lock, flags); + spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->count > 0) events |= POLLIN; if (ctx->count == ULLONG_MAX) events |= POLLERR; if (ULLONG_MAX - 1 > ctx->count) events |= POLLOUT; - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); return events; } @@ -92,7 +91,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, if (count < sizeof(ucnt)) return -EINVAL; - spin_lock_irq(&ctx->lock); + spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; ucnt = ctx->count; if (ucnt > 0) @@ -110,9 +109,9 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, res = -ERESTARTSYS; break; } - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); schedule(); - spin_lock_irq(&ctx->lock); + spin_lock_irq(&ctx->wqh.lock); } __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); @@ -122,7 +121,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, if (waitqueue_active(&ctx->wqh)) wake_up_locked(&ctx->wqh); } - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) return -EFAULT; @@ -143,7 +142,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c return -EFAULT; if (ucnt == ULLONG_MAX) return -EINVAL; - spin_lock_irq(&ctx->lock); + spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; if (ULLONG_MAX - ctx->count > ucnt) res = sizeof(ucnt); @@ -159,9 +158,9 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c res = -ERESTARTSYS; break; } - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); schedule(); - spin_lock_irq(&ctx->lock); + spin_lock_irq(&ctx->wqh.lock); } __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); @@ -171,7 +170,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c if (waitqueue_active(&ctx->wqh)) wake_up_locked(&ctx->wqh); } - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); return res; } @@ -210,7 +209,6 @@ asmlinkage long sys_eventfd(unsigned int count) return -ENOMEM; init_waitqueue_head(&ctx->wqh); - spin_lock_init(&ctx->lock); ctx->count = count; /* diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1aad34ea61a4..0b73cd45a06d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1,6 +1,6 @@ /* - * fs/eventpoll.c ( Efficent event polling implementation ) - * Copyright (C) 2001,...,2006 Davide Libenzi + * fs/eventpoll.c (Efficent event polling implementation) + * Copyright (C) 2001,...,2007 Davide Libenzi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,7 +26,6 @@ #include <linux/hash.h> #include <linux/spinlock.h> #include <linux/syscalls.h> -#include <linux/rwsem.h> #include <linux/rbtree.h> #include <linux/wait.h> #include <linux/eventpoll.h> @@ -39,15 +38,14 @@ #include <asm/io.h> #include <asm/mman.h> #include <asm/atomic.h> -#include <asm/semaphore.h> /* * LOCKING: * There are three level of locking required by epoll : * * 1) epmutex (mutex) - * 2) ep->sem (rw_semaphore) - * 3) ep->lock (rw_lock) + * 2) ep->mtx (mutex) + * 3) ep->lock (spinlock) * * The acquire order is the one listed above, from 1 to 3. * We need a spinlock (ep->lock) because we manipulate objects @@ -57,20 +55,20 @@ * a spinlock. During the event transfer loop (from kernel to * user space) we could end up sleeping due a copy_to_user(), so * we need a lock that will allow us to sleep. This lock is a - * read-write semaphore (ep->sem). It is acquired on read during - * the event transfer loop and in write during epoll_ctl(EPOLL_CTL_DEL) - * and during eventpoll_release_file(). Then we also need a global - * semaphore to serialize eventpoll_release_file() and ep_free(). - * This semaphore is acquired by ep_free() during the epoll file + * mutex (ep->mtx). It is acquired during the event transfer loop, + * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). + * Then we also need a global mutex to serialize eventpoll_release_file() + * and ep_free(). + * This mutex is acquired by ep_free() during the epoll file * cleanup path and it is also acquired by eventpoll_release_file() * if a file has been pushed inside an epoll set and it is then * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). - * It is possible to drop the "ep->sem" and to use the global - * semaphore "epmutex" (together with "ep->lock") to have it working, - * but having "ep->sem" will make the interface more scalable. + * It is possible to drop the "ep->mtx" and to use the global + * mutex "epmutex" (together with "ep->lock") to have it working, + * but having "ep->mtx" will make the interface more scalable. * Events that require holding "epmutex" are very rare, while for - * normal operations the epoll private "ep->sem" will guarantee - * a greater scalability. + * normal operations the epoll private "ep->mtx" will guarantee + * a better scalability. */ #define DEBUG_EPOLL 0 @@ -102,6 +100,8 @@ #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) +#define EP_UNACTIVE_PTR ((void *) -1L) + struct epoll_filefd { struct file *file; int fd; @@ -111,7 +111,7 @@ struct epoll_filefd { * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". * It is used to keep track on all tasks that are currently inside the wake_up() code * to 1) short-circuit the one coming from the same task and same wait queue head - * ( loop ) 2) allow a maximum number of epoll descriptors inclusion nesting + * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting * 3) let go the ones coming from other tasks. */ struct wake_task_node { @@ -130,21 +130,57 @@ struct poll_safewake { }; /* + * Each file descriptor added to the eventpoll interface will + * have an entry of this type linked to the "rbr" RB tree. + */ +struct epitem { + /* RB tree node used to link this structure to the eventpoll RB tree */ + struct rb_node rbn; + + /* List header used to link this structure to the eventpoll ready list */ + struct list_head rdllink; + + /* + * Works together "struct eventpoll"->ovflist in keeping the + * single linked chain of items. + */ + struct epitem *next; + + /* The file descriptor information this item refers to */ + struct epoll_filefd ffd; + + /* Number of active wait queue attached to poll operations */ + int nwait; + + /* List containing poll wait queues */ + struct list_head pwqlist; + + /* The "container" of this item */ + struct eventpoll *ep; + + /* List header used to link this item to the "struct file" items list */ + struct list_head fllink; + + /* The structure that describe the interested events and the source fd */ + struct epoll_event event; +}; + +/* * This structure is stored inside the "private_data" member of the file * structure and rapresent the main data sructure for the eventpoll * interface. */ struct eventpoll { /* Protect the this structure access */ - rwlock_t lock; + spinlock_t lock; /* - * This semaphore is used to ensure that files are not removed - * while epoll is using them. This is read-held during the event - * collection loop and it is write-held during the file cleanup - * path, the epoll file exit code and the ctl operations. + * This mutex is used to ensure that files are not removed + * while epoll is using them. This is held during the event + * collection loop, the file cleanup path, the epoll file exit + * code and the ctl operations. */ - struct rw_semaphore sem; + struct mutex mtx; /* Wait queue used by sys_epoll_wait() */ wait_queue_head_t wq; @@ -155,8 +191,15 @@ struct eventpoll { /* List of ready file descriptors */ struct list_head rdllist; - /* RB-Tree root used to store monitored fd structs */ + /* RB tree root used to store monitored fd structs */ struct rb_root rbr; + + /* + * This is a single linked list that chains all the "struct epitem" that + * happened while transfering ready events to userspace w/out + * holding ->lock. + */ + struct epitem *ovflist; }; /* Wait structure used by the poll hooks */ @@ -177,42 +220,6 @@ struct eppoll_entry { wait_queue_head_t *whead; }; -/* - * Each file descriptor added to the eventpoll interface will - * have an entry of this type linked to the "rbr" RB tree. - */ -struct epitem { - /* RB-Tree node used to link this structure to the eventpoll rb-tree */ - struct rb_node rbn; - - /* List header used to link this structure to the eventpoll ready list */ - struct list_head rdllink; - - /* The file descriptor information this item refers to */ - struct epoll_filefd ffd; - - /* Number of active wait queue attached to poll operations */ - int nwait; - - /* List containing poll wait queues */ - struct list_head pwqlist; - - /* The "container" of this item */ - struct eventpoll *ep; - - /* The structure that describe the interested events and the source fd */ - struct epoll_event event; - - /* - * Used to keep track of the usage count of the structure. This avoids - * that the structure will desappear from underneath our processing. - */ - atomic_t usecnt; - - /* List header used to link this item to the "struct file" items list */ - struct list_head fllink; -}; - /* Wrapper struct used by poll queueing */ struct ep_pqueue { poll_table pt; @@ -220,7 +227,7 @@ struct ep_pqueue { }; /* - * This semaphore is used to serialize ep_free() and eventpoll_release_file(). + * This mutex is used to serialize ep_free() and eventpoll_release_file(). */ static struct mutex epmutex; @@ -234,7 +241,7 @@ static struct kmem_cache *epi_cache __read_mostly; static struct kmem_cache *pwq_cache __read_mostly; -/* Setup the structure that is used as key for the rb-tree */ +/* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, struct file *file, int fd) { @@ -242,7 +249,7 @@ static inline void ep_set_ffd(struct epoll_filefd *ffd, ffd->fd = fd; } -/* Compare rb-tree keys */ +/* Compare RB tree keys */ static inline int ep_cmp_ffd(struct epoll_filefd *p1, struct epoll_filefd *p2) { @@ -250,20 +257,20 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* Special initialization for the rb-tree node to detect linkage */ +/* Special initialization for the RB tree node to detect linkage */ static inline void ep_rb_initnode(struct rb_node *n) { rb_set_parent(n, n); } -/* Removes a node from the rb-tree and marks it for a fast is-linked check */ +/* Removes a node from the RB tree and marks it for a fast is-linked check */ static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) { rb_erase(n, r); rb_set_parent(n, n); } -/* Fast check to verify that the item is linked to the main rb-tree */ +/* Fast check to verify that the item is linked to the main RB tree */ static inline int ep_rb_linked(struct rb_node *n) { return rb_parent(n) != n; @@ -381,78 +388,11 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) } /* - * Unlink the "struct epitem" from all places it might have been hooked up. - * This function must be called with write IRQ lock on "ep->lock". - */ -static int ep_unlink(struct eventpoll *ep, struct epitem *epi) -{ - int error; - - /* - * It can happen that this one is called for an item already unlinked. - * The check protect us from doing a double unlink ( crash ). - */ - error = -ENOENT; - if (!ep_rb_linked(&epi->rbn)) - goto error_return; - - /* - * Clear the event mask for the unlinked item. This will avoid item - * notifications to be sent after the unlink operation from inside - * the kernel->userspace event transfer loop. - */ - epi->event.events = 0; - - /* - * At this point is safe to do the job, unlink the item from our rb-tree. - * This operation togheter with the above check closes the door to - * double unlinks. - */ - ep_rb_erase(&epi->rbn, &ep->rbr); - - /* - * If the item we are going to remove is inside the ready file descriptors - * we want to remove it from this list to avoid stale events. - */ - if (ep_is_linked(&epi->rdllink)) - list_del_init(&epi->rdllink); - - error = 0; -error_return: - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", - current, ep, epi->ffd.file, error)); - - return error; -} - -/* - * Increment the usage count of the "struct epitem" making it sure - * that the user will have a valid pointer to reference. - */ -static void ep_use_epitem(struct epitem *epi) -{ - atomic_inc(&epi->usecnt); -} - -/* - * Decrement ( release ) the usage count by signaling that the user - * has finished using the structure. It might lead to freeing the - * structure itself if the count goes to zero. - */ -static void ep_release_epitem(struct epitem *epi) -{ - if (atomic_dec_and_test(&epi->usecnt)) - kmem_cache_free(epi_cache, epi); -} - -/* * Removes a "struct epitem" from the eventpoll RB tree and deallocates - * all the associated resources. + * all the associated resources. Must be called with "mtx" held. */ static int ep_remove(struct eventpoll *ep, struct epitem *epi) { - int error; unsigned long flags; struct file *file = epi->ffd.file; @@ -472,26 +412,21 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->fllink); spin_unlock(&file->f_ep_lock); - /* We need to acquire the write IRQ lock before calling ep_unlink() */ - write_lock_irqsave(&ep->lock, flags); - - /* Really unlink the item from the RB tree */ - error = ep_unlink(ep, epi); - - write_unlock_irqrestore(&ep->lock, flags); + if (ep_rb_linked(&epi->rbn)) + ep_rb_erase(&epi->rbn, &ep->rbr); - if (error) - goto error_return; + spin_lock_irqsave(&ep->lock, flags); + if (ep_is_linked(&epi->rdllink)) + list_del_init(&epi->rdllink); + spin_unlock_irqrestore(&ep->lock, flags); /* At this point it is safe to free the eventpoll item */ - ep_release_epitem(epi); + kmem_cache_free(epi_cache, epi); - error = 0; -error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n", - current, ep, file, error)); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", + current, ep, file)); - return error; + return 0; } static void ep_free(struct eventpoll *ep) @@ -506,7 +441,7 @@ static void ep_free(struct eventpoll *ep) /* * We need to lock this because we could be hit by * eventpoll_release_file() while we're freeing the "struct eventpoll". - * We do not need to hold "ep->sem" here because the epoll file + * We do not need to hold "ep->mtx" here because the epoll file * is on the way to be removed and no one has references to it * anymore. The only hit might come from eventpoll_release_file() but * holding "epmutex" is sufficent here. @@ -525,7 +460,7 @@ static void ep_free(struct eventpoll *ep) /* * Walks through the whole tree by freeing each "struct epitem". At this * point we are sure no poll callbacks will be lingering around, and also by - * write-holding "sem" we can be sure that no file cleanup code will hit + * holding "epmutex" we can be sure that no file cleanup code will hit * us during this operation. So we can avoid the lock on "ep->lock". */ while ((rbp = rb_first(&ep->rbr)) != 0) { @@ -534,16 +469,16 @@ static void ep_free(struct eventpoll *ep) } mutex_unlock(&epmutex); + mutex_destroy(&ep->mtx); + kfree(ep); } static int ep_eventpoll_release(struct inode *inode, struct file *file) { struct eventpoll *ep = file->private_data; - if (ep) { + if (ep) ep_free(ep); - kfree(ep); - } DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); return 0; @@ -559,10 +494,10 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) poll_wait(file, &ep->poll_wait, wait); /* Check our condition */ - read_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); if (!list_empty(&ep->rdllist)) pollflags = POLLIN | POLLRDNORM; - read_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); return pollflags; } @@ -594,9 +529,11 @@ void eventpoll_release_file(struct file *file) * We don't want to get "file->f_ep_lock" because it is not * necessary. It is not necessary because we're in the "struct file" * cleanup path, and this means that noone is using this file anymore. - * The only hit might come from ep_free() but by holding the semaphore + * So, for example, epoll_ctl() cannot hit here sicne if we reach this + * point, the file counter already went to zero and fget() would fail. + * The only hit might come from ep_free() but by holding the mutex * will correctly serialize the operation. We do need to acquire - * "ep->sem" after "epmutex" because ep_remove() requires it when called + * "ep->mtx" after "epmutex" because ep_remove() requires it when called * from anywhere but ep_free(). */ mutex_lock(&epmutex); @@ -606,9 +543,9 @@ void eventpoll_release_file(struct file *file) ep = epi->ep; list_del_init(&epi->fllink); - down_write(&ep->sem); + mutex_lock(&ep->mtx); ep_remove(ep, epi); - up_write(&ep->sem); + mutex_unlock(&ep->mtx); } mutex_unlock(&epmutex); @@ -621,12 +558,13 @@ static int ep_alloc(struct eventpoll **pep) if (!ep) return -ENOMEM; - rwlock_init(&ep->lock); - init_rwsem(&ep->sem); + spin_lock_init(&ep->lock); + mutex_init(&ep->mtx); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT; + ep->ovflist = EP_UNACTIVE_PTR; *pep = ep; @@ -636,20 +574,18 @@ static int ep_alloc(struct eventpoll **pep) } /* - * Search the file inside the eventpoll tree. It add usage count to - * the returned item, so the caller must call ep_release_epitem() - * after finished using the "struct epitem". + * Search the file inside the eventpoll tree. The RB tree operations + * are protected by the "mtx" mutex, and ep_find() must be called with + * "mtx" held. */ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) { int kcmp; - unsigned long flags; struct rb_node *rbp; struct epitem *epi, *epir = NULL; struct epoll_filefd ffd; ep_set_ffd(&ffd, file, fd); - read_lock_irqsave(&ep->lock, flags); for (rbp = ep->rbr.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); kcmp = ep_cmp_ffd(&ffd, &epi->ffd); @@ -658,12 +594,10 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) else if (kcmp < 0) rbp = rbp->rb_left; else { - ep_use_epitem(epi); epir = epi; break; } } - read_unlock_irqrestore(&ep->lock, flags); DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", current, file, epir)); @@ -686,7 +620,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", current, epi->ffd.file, epi, ep)); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); /* * If the event mask does not contain any poll(2) event, we consider the @@ -695,7 +629,21 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k * until the next EPOLL_CTL_MOD will be issued. */ if (!(epi->event.events & ~EP_PRIVATE_BITS)) - goto is_disabled; + goto out_unlock; + + /* + * If we are trasfering events to userspace, we can hold no locks + * (because we're accessing user memory, and because of linux f_op->poll() + * semantics). All the events that happens during that period of time are + * chained in ep->ovflist and requeued later on. + */ + if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (epi->next == EP_UNACTIVE_PTR) { + epi->next = ep->ovflist; + ep->ovflist = epi; + } + goto out_unlock; + } /* If this file is already in the ready list we exit soon */ if (ep_is_linked(&epi->rdllink)) @@ -714,8 +662,8 @@ is_linked: if (waitqueue_active(&ep->poll_wait)) pwake++; -is_disabled: - write_unlock_irqrestore(&ep->lock, flags); +out_unlock: + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -766,6 +714,9 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } +/* + * Must be called with "mtx" held. + */ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct file *tfile, int fd) { @@ -786,8 +737,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; - atomic_set(&epi->usecnt, 1); epi->nwait = 0; + epi->next = EP_UNACTIVE_PTR; /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -796,7 +747,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* * Attach the item to the poll hooks and get current event bits. * We can safely use the file* here because its usage count has - * been increased by the caller of this function. + * been increased by the caller of this function. Note that after + * this operation completes, the poll callback can start hitting + * the new item. */ revents = tfile->f_op->poll(tfile, &epq.pt); @@ -813,12 +766,15 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, list_add_tail(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_ep_lock); - /* We have to drop the new item inside our item list to keep track of it */ - write_lock_irqsave(&ep->lock, flags); - - /* Add the current item to the rb-tree */ + /* + * Add the current item to the RB tree. All RB tree operations are + * protected by "mtx", and ep_insert() is called with "mtx" held. + */ ep_rbtree_insert(ep, epi); + /* We have to drop the new item inside our item list to keep track of it */ + spin_lock_irqsave(&ep->lock, flags); + /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); @@ -830,7 +786,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, pwake++; } - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -846,12 +802,14 @@ error_unregister: /* * We need to do this because an event could have been arrived on some - * allocated wait queue. + * allocated wait queue. Note that we don't care about the ep->ovflist + * list, since that is used/cleaned only inside a section bound by "mtx". + * And ep_insert() is called with "mtx" held. */ - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) list_del_init(&epi->rdllink); - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); kmem_cache_free(epi_cache, epi); error_return: @@ -860,7 +818,7 @@ error_return: /* * Modify the interest event mask by dropping an event if the new mask - * has a match in the current file status. + * has a match in the current file status. Must be called with "mtx" held. */ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) { @@ -882,36 +840,28 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); /* Copy the data member from inside the lock */ epi->event.data = event->data; /* - * If the item is not linked to the RB tree it means that it's on its - * way toward the removal. Do nothing in this case. + * If the item is "hot" and it is not registered inside the ready + * list, push it inside. */ - if (ep_rb_linked(&epi->rbn)) { - /* - * If the item is "hot" and it is not registered inside the ready - * list, push it inside. If the item is not "hot" and it is currently - * registered inside the ready list, unlink it. - */ - if (revents & event->events) { - if (!ep_is_linked(&epi->rdllink)) { - list_add_tail(&epi->rdllink, &ep->rdllist); - - /* Notify waiting tasks that events are available */ - if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | - TASK_INTERRUPTIBLE); - if (waitqueue_active(&ep->poll_wait)) - pwake++; - } + if (revents & event->events) { + if (!ep_is_linked(&epi->rdllink)) { + list_add_tail(&epi->rdllink, &ep->rdllist); + + /* Notify waiting tasks that events are available */ + if (waitqueue_active(&ep->wq)) + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | + TASK_INTERRUPTIBLE); + if (waitqueue_active(&ep->poll_wait)) + pwake++; } } - - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -920,36 +870,50 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even return 0; } -/* - * This function is called without holding the "ep->lock" since the call to - * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ - * because of the way poll() is traditionally implemented in Linux. - */ -static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, - struct epoll_event __user *events, int maxevents) +static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, + int maxevents) { int eventcnt, error = -EFAULT, pwake = 0; unsigned int revents; unsigned long flags; - struct epitem *epi; - struct list_head injlist; + struct epitem *epi, *nepi; + struct list_head txlist; + + INIT_LIST_HEAD(&txlist); + + /* + * We need to lock this because we could be hit by + * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). + */ + mutex_lock(&ep->mtx); - INIT_LIST_HEAD(&injlist); + /* + * Steal the ready list, and re-init the original one to the + * empty list. Also, set ep->ovflist to NULL so that events + * happening while looping w/out locks, are not lost. We cannot + * have the poll callback to queue directly on ep->rdllist, + * because we are doing it in the loop below, in a lockless way. + */ + spin_lock_irqsave(&ep->lock, flags); + list_splice(&ep->rdllist, &txlist); + INIT_LIST_HEAD(&ep->rdllist); + ep->ovflist = NULL; + spin_unlock_irqrestore(&ep->lock, flags); /* * We can loop without lock because this is a task private list. * We just splice'd out the ep->rdllist in ep_collect_ready_items(). - * Items cannot vanish during the loop because we are holding "sem" in - * read. + * Items cannot vanish during the loop because we are holding "mtx". */ - for (eventcnt = 0; !list_empty(txlist) && eventcnt < maxevents;) { - epi = list_first_entry(txlist, struct epitem, rdllink); - prefetch(epi->rdllink.next); + for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) { + epi = list_first_entry(&txlist, struct epitem, rdllink); + + list_del_init(&epi->rdllink); /* * Get the ready file event set. We can safely use the file - * because we are holding the "sem" in read and this will - * guarantee that both the file and the item will not vanish. + * because we are holding the "mtx" and this will guarantee + * that both the file and the item will not vanish. */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); revents &= epi->event.events; @@ -957,8 +921,8 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, /* * Is the event mask intersect the caller-requested one, * deliver the event to userspace. Again, we are holding - * "sem" in read, so no operations coming from userspace - * can change the item. + * "mtx", so no operations coming from userspace can change + * the item. */ if (revents) { if (__put_user(revents, @@ -970,59 +934,59 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, epi->event.events &= EP_PRIVATE_BITS; eventcnt++; } - /* - * This is tricky. We are holding the "sem" in read, and this - * means that the operations that can change the "linked" status - * of the epoll item (epi->rbn and epi->rdllink), cannot touch - * them. Also, since we are "linked" from a epi->rdllink POV - * (the item is linked to our transmission list we just - * spliced), the ep_poll_callback() cannot touch us either, - * because of the check present in there. Another parallel - * epoll_wait() will not get the same result set, since we - * spliced the ready list before. Note that list_del() still - * shows the item as linked to the test in ep_poll_callback(). + * At this point, noone can insert into ep->rdllist besides + * us. The epoll_ctl() callers are locked out by us holding + * "mtx" and the poll callback will queue them in ep->ovflist. */ - list_del(&epi->rdllink); if (!(epi->event.events & EPOLLET) && - (revents & epi->event.events)) - list_add_tail(&epi->rdllink, &injlist); - else { - /* - * Be sure the item is totally detached before re-init - * the list_head. After INIT_LIST_HEAD() is committed, - * the ep_poll_callback() can requeue the item again, - * but we don't care since we are already past it. - */ - smp_mb(); - INIT_LIST_HEAD(&epi->rdllink); - } + (revents & epi->event.events)) + list_add_tail(&epi->rdllink, &ep->rdllist); } error = 0; - errxit: +errxit: + spin_lock_irqsave(&ep->lock, flags); /* - * If the re-injection list or the txlist are not empty, re-splice - * them to the ready list and do proper wakeups. + * During the time we spent in the loop above, some other events + * might have been queued by the poll callback. We re-insert them + * here (in case they are not already queued, or they're one-shot). */ - if (!list_empty(&injlist) || !list_empty(txlist)) { - write_lock_irqsave(&ep->lock, flags); + for (nepi = ep->ovflist; (epi = nepi) != NULL; + nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { + if (!ep_is_linked(&epi->rdllink) && + (epi->event.events & ~EP_PRIVATE_BITS)) + list_add_tail(&epi->rdllink, &ep->rdllist); + } + /* + * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after + * releasing the lock, events will be queued in the normal way inside + * ep->rdllist. + */ + ep->ovflist = EP_UNACTIVE_PTR; + + /* + * In case of error in the event-send loop, or in case the number of + * ready events exceeds the userspace limit, we need to splice the + * "txlist" back inside ep->rdllist. + */ + list_splice(&txlist, &ep->rdllist); - list_splice(txlist, &ep->rdllist); - list_splice(&injlist, &ep->rdllist); + if (!list_empty(&ep->rdllist)) { /* - * Wake up ( if active ) both the eventpoll wait list and the ->poll() - * wait list. + * Wake up (if active) both the eventpoll wait list and the ->poll() + * wait list (delayed after we release the lock). */ if (waitqueue_active(&ep->wq)) __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE); if (waitqueue_active(&ep->poll_wait)) pwake++; - - write_unlock_irqrestore(&ep->lock, flags); } + spin_unlock_irqrestore(&ep->lock, flags); + + mutex_unlock(&ep->mtx); /* We have to call this outside the lock */ if (pwake) @@ -1031,41 +995,6 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, return eventcnt == 0 ? error: eventcnt; } -/* - * Perform the transfer of events to user space. - */ -static int ep_events_transfer(struct eventpoll *ep, - struct epoll_event __user *events, int maxevents) -{ - int eventcnt; - unsigned long flags; - struct list_head txlist; - - INIT_LIST_HEAD(&txlist); - - /* - * We need to lock this because we could be hit by - * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). - */ - down_read(&ep->sem); - - /* - * Steal the ready list, and re-init the original one to the - * empty list. - */ - write_lock_irqsave(&ep->lock, flags); - list_splice(&ep->rdllist, &txlist); - INIT_LIST_HEAD(&ep->rdllist); - write_unlock_irqrestore(&ep->lock, flags); - - /* Build result set in userspace */ - eventcnt = ep_send_events(ep, &txlist, events, maxevents); - - up_read(&ep->sem); - - return eventcnt; -} - static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { @@ -1083,7 +1012,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; retry: - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); res = 0; if (list_empty(&ep->rdllist)) { @@ -1093,6 +1022,7 @@ retry: * ep_poll_callback() when events will become available. */ init_waitqueue_entry(&wait, current); + wait.flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue(&ep->wq, &wait); for (;;) { @@ -1109,9 +1039,9 @@ retry: break; } - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); jtimeout = schedule_timeout(jtimeout); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); } __remove_wait_queue(&ep->wq, &wait); @@ -1121,7 +1051,7 @@ retry: /* Is it worth to try to dig for events ? */ eavail = !list_empty(&ep->rdllist); - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* * Try to transfer events to user space. In case we get 0 events and @@ -1129,18 +1059,17 @@ retry: * more luck. */ if (!res && eavail && - !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout) + !(res = ep_send_events(ep, events, maxevents)) && jtimeout) goto retry; return res; } /* - * It opens an eventpoll file descriptor by suggesting a storage of "size" - * file descriptors. The size parameter is just an hint about how to size - * data structures. It won't prevent the user to store more than "size" - * file descriptors inside the epoll interface. It is the kernel part of - * the userspace epoll_create(2). + * It opens an eventpoll file descriptor. The "size" parameter is there + * for historical reasons, when epoll was using an hash instead of an + * RB tree. With the current implementation, the "size" parameter is ignored + * (besides sanity checks). */ asmlinkage long sys_epoll_create(int size) { @@ -1176,7 +1105,6 @@ asmlinkage long sys_epoll_create(int size) error_free: ep_free(ep); - kfree(ep); error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", current, size, error)); @@ -1186,8 +1114,7 @@ error_return: /* * The following function implements the controller interface for * the eventpoll file that enables the insertion/removal/change of - * file descriptors inside the interest set. It represents - * the kernel part of the user space epoll_ctl(2). + * file descriptors inside the interest set. */ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) @@ -1237,9 +1164,13 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, */ ep = file->private_data; - down_write(&ep->sem); + mutex_lock(&ep->mtx); - /* Try to lookup the file inside our RB tree */ + /* + * Try to lookup the file inside our RB tree, Since we grabbed "mtx" + * above, we can be sure to be able to use the item looked up by + * ep_find() till we release the mutex. + */ epi = ep_find(ep, tfile, fd); error = -EINVAL; @@ -1266,13 +1197,7 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, error = -ENOENT; break; } - /* - * The function ep_find() increments the usage count of the structure - * so, if this is not NULL, we need to release it. - */ - if (epi) - ep_release_epitem(epi); - up_write(&ep->sem); + mutex_unlock(&ep->mtx); error_tgt_fput: fput(tfile); @@ -1378,7 +1303,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, if (sigmask) { if (error == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); + sizeof(sigsaved)); set_thread_flag(TIF_RESTORE_SIGMASK); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); diff --git a/fs/exec.c b/fs/exec.c index 70fa36554c14..0b685888ff6f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -60,7 +60,7 @@ #endif int core_uses_pid; -char core_pattern[128] = "core"; +char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; EXPORT_SYMBOL(suid_dumpable); @@ -1264,8 +1264,6 @@ int set_binfmt(struct linux_binfmt *new) EXPORT_SYMBOL(set_binfmt); -#define CORENAME_MAX_SIZE 64 - /* format_corename will inspect the pattern parameter, and output a * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 685a1c287177..16337bff0272 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -160,13 +160,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - rwlock_init(&ei->i_meta_lock); + rwlock_init(&ei->i_meta_lock); #ifdef CONFIG_EXT2_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - inode_init_once(&ei->vfs_inode); - } + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 54d3c9041259..6e3062913a92 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -466,14 +466,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_orphan); + INIT_LIST_HEAD(&ei->i_orphan); #ifdef CONFIG_EXT3_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - mutex_init(&ei->truncate_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->truncate_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 719126932354..cb9afdd0e26e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -517,14 +517,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_orphan); + INIT_LIST_HEAD(&ei->i_orphan); #ifdef CONFIG_EXT4DEV_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - mutex_init(&ei->truncate_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->truncate_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 1959143c1d27..3c9c8a15ec73 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -40,8 +40,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct fat_cache *cache = (struct fat_cache *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - INIT_LIST_HEAD(&cache->cache_list); + INIT_LIST_HEAD(&cache->cache_list); } int __init fat_cache_init(void) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 2c55e8dce793..479722d89667 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -500,14 +500,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - spin_lock_init(&ei->cache_lru_lock); - ei->nr_caches = 0; - ei->cache_valid_id = FAT_CACHE_VALID + 1; - INIT_LIST_HEAD(&ei->cache_lru); - INIT_HLIST_NODE(&ei->i_fat_hash); - inode_init_once(&ei->vfs_inode); - } + spin_lock_init(&ei->cache_lru_lock); + ei->nr_caches = 0; + ei->cache_valid_id = FAT_CACHE_VALID + 1; + INIT_LIST_HEAD(&ei->cache_lru); + INIT_HLIST_NODE(&ei->i_fat_hash); + inode_init_once(&ei->vfs_inode); } static int __init fat_init_inodecache(void) diff --git a/fs/fifo.c b/fs/fifo.c index 6e7df7256782..9785e36f81e7 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/fs.h> +#include <linux/sched.h> #include <linux/pipe_fs_i.h> static void wait_for_partner(struct inode* inode, unsigned int *cnt) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index acfad65a6e8e..d0ed60bc3188 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -11,6 +11,7 @@ #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/kernel.h> +#include <linux/sched.h> static const struct file_operations fuse_direct_io_file_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1397018ff476..78f7a1dc90dd 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -17,6 +17,7 @@ #include <linux/parser.h> #include <linux/statfs.h> #include <linux/random.h> +#include <linux/sched.h> MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -687,8 +688,7 @@ static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep, { struct inode * inode = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(inode); + inode_init_once(inode); } static int __init fuse_fs_init(void) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 11477ca3a3c0..b3e152db70c8 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -10,6 +10,7 @@ #ifndef __GLOCK_DOT_H__ #define __GLOCK_DOT_H__ +#include <linux/sched.h> #include "incore.h" /* Flags for lock requests; used in gfs2_holder gh_flag field. diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index e460487c0557..787a0edef100 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -27,29 +27,27 @@ static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_inode *ip = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&ip->i_inode); - spin_lock_init(&ip->i_spin); - init_rwsem(&ip->i_rw_mutex); - memset(ip->i_cache, 0, sizeof(ip->i_cache)); - } + + inode_init_once(&ip->i_inode); + spin_lock_init(&ip->i_spin); + init_rwsem(&ip->i_rw_mutex); + memset(ip->i_cache, 0, sizeof(ip->i_cache)); } static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_glock *gl = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_HLIST_NODE(&gl->gl_list); - spin_lock_init(&gl->gl_spin); - INIT_LIST_HEAD(&gl->gl_holders); - INIT_LIST_HEAD(&gl->gl_waiters1); - INIT_LIST_HEAD(&gl->gl_waiters3); - gl->gl_lvb = NULL; - atomic_set(&gl->gl_lvb_count, 0); - INIT_LIST_HEAD(&gl->gl_reclaim); - INIT_LIST_HEAD(&gl->gl_ail_list); - atomic_set(&gl->gl_ail_count, 0); - } + + INIT_HLIST_NODE(&gl->gl_list); + spin_lock_init(&gl->gl_spin); + INIT_LIST_HEAD(&gl->gl_holders); + INIT_LIST_HEAD(&gl->gl_waiters1); + INIT_LIST_HEAD(&gl->gl_waiters3); + gl->gl_lvb = NULL; + atomic_set(&gl->gl_lvb_count, 0); + INIT_LIST_HEAD(&gl->gl_reclaim); + INIT_LIST_HEAD(&gl->gl_ail_list); + atomic_set(&gl->gl_ail_count, 0); } /** diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index fafcba593871..9a934db0bd8a 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -13,6 +13,7 @@ #include <linux/pagemap.h> #include <linux/mpage.h> +#include <linux/sched.h> #include "hfs_fs.h" #include "btree.h" diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4f1888f16cf0..92cf8751e428 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -434,8 +434,7 @@ static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flag { struct hfs_inode_info *i = p; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&i->vfs_inode); + inode_init_once(&i->vfs_inode); } static int __init init_hfs_fs(void) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 642012ac3370..45dab5d6cc10 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -12,6 +12,7 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/mpage.h> +#include <linux/sched.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 37afbec8a761..ebd1b380cbbc 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -470,8 +470,7 @@ static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long { struct hfsplus_inode_info *i = p; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&i->vfs_inode); + inode_init_once(&i->vfs_inode); } static int __init init_hfsplus_fs(void) diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index b52b7381d10f..b6fca543544c 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c @@ -5,7 +5,7 @@ * * general buffer i/o */ - +#include <linux/sched.h> #include "hpfs_fn.h" void hpfs_lock_creation(struct super_block *s) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9953cf9a2f16..d256559b4104 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -5,7 +5,7 @@ * * adding & removing files & directories */ - +#include <linux/sched.h> #include "hpfs_fn.h" static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 1b95f39fbc37..29cc34abb2ea 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/statfs.h> #include <linux/magic.h> +#include <linux/sched.h> /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ @@ -176,11 +177,9 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - mutex_init(&ei->i_mutex); - mutex_init(&ei->i_parent_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->i_mutex); + mutex_init(&ei->i_parent_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 98959b87cdf8..aa083dd34e92 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -556,8 +556,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } const struct file_operations hugetlbfs_file_operations = { diff --git a/fs/inode.c b/fs/inode.c index df2ef15d03d2..9a012cc5b6cd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -213,8 +213,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct inode * inode = (struct inode *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(inode); + inode_init_once(inode); } /* diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index e99f7ff4ecb4..5c3eecf7542e 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -77,8 +77,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct iso_inode_info *ei = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 45368f8bbe72..6488af43bc9b 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -47,10 +47,8 @@ static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned l { struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - init_MUTEX(&ei->sem); - inode_init_once(&ei->vfs_inode); - } + init_MUTEX(&ei->sem); + inode_init_once(&ei->vfs_inode); } static int jffs2_sync_fs(struct super_block *sb, int wait) diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 6b3acb0b5781..43d4f69afbec 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -184,16 +184,14 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - mp->lid = 0; - mp->lsn = 0; - mp->flag = 0; - mp->data = NULL; - mp->clsn = 0; - mp->log = NULL; - set_bit(META_free, &mp->flag); - init_waitqueue_head(&mp->wait); - } + mp->lid = 0; + mp->lsn = 0; + mp->flag = 0; + mp->data = NULL; + mp->clsn = 0; + mp->log = NULL; + set_bit(META_free, &mp->flag); + init_waitqueue_head(&mp->wait); } static inline struct metapage *alloc_metapage(gfp_t gfp_mask) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index ea9dc3e65dcf..20e4ac1c79a3 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -752,20 +752,18 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); - INIT_LIST_HEAD(&jfs_ip->anon_inode_list); - init_rwsem(&jfs_ip->rdwrlock); - mutex_init(&jfs_ip->commit_mutex); - init_rwsem(&jfs_ip->xattr_sem); - spin_lock_init(&jfs_ip->ag_lock); - jfs_ip->active_ag = -1; + memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); + INIT_LIST_HEAD(&jfs_ip->anon_inode_list); + init_rwsem(&jfs_ip->rdwrlock); + mutex_init(&jfs_ip->commit_mutex); + init_rwsem(&jfs_ip->xattr_sem); + spin_lock_init(&jfs_ip->ag_lock); + jfs_ip->active_ag = -1; #ifdef CONFIG_JFS_POSIX_ACL - jfs_ip->i_acl = JFS_ACL_NOT_CACHED; - jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; + jfs_ip->i_acl = JFS_ACL_NOT_CACHED; + jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; #endif - inode_init_once(&jfs_ip->vfs_inode); - } + inode_init_once(&jfs_ip->vfs_inode); } static int __init init_jfs_fs(void) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index f4d45d4d835b..d070b18e539d 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -153,7 +153,7 @@ nlmclnt_recovery(struct nlm_host *host) if (!host->h_reclaiming++) { nlm_get_host(host); __module_get(THIS_MODULE); - if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0) + if (kernel_thread(reclaimer, host, CLONE_FS | CLONE_FILES) < 0) module_put(THIS_MODULE); } } diff --git a/fs/lockd/host.c b/fs/lockd/host.c index ad21c0713efa..96070bff93fc 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -221,7 +221,7 @@ nlm_bind_host(struct nlm_host *host) host->h_nextrebind - jiffies); } } else { - unsigned long increment = nlmsvc_timeout * HZ; + unsigned long increment = nlmsvc_timeout; struct rpc_timeout timeparms = { .to_initval = increment, .to_increment = increment, diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9702956d206c..5316e307a49d 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -586,10 +586,6 @@ static struct rpc_version nlm_version3 = { .procs = nlm_procedures, }; -#ifdef CONFIG_LOCKD_V4 -extern struct rpc_version nlm_version4; -#endif - static struct rpc_version * nlm_versions[] = { [1] = &nlm_version1, [3] = &nlm_version3, diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index ce1efdbe1b3a..846fc1d639dd 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -123,7 +123,8 @@ static __be32 * nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; - __s64 len, start, end; + __u64 len, start; + __s64 end; if (!(p = xdr_decode_string_inplace(p, &lock->caller, &lock->len, NLM_MAXSTRLEN)) @@ -417,7 +418,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) if (resp->status == nlm_lck_denied) { struct file_lock *fl = &resp->lock.fl; u32 excl; - s64 start, end, len; + __u64 start, len; + __s64 end; memset(&resp->lock, 0, sizeof(resp->lock)); locks_init_lock(fl); diff --git a/fs/locks.c b/fs/locks.c index 8ec16ab5ef74..431a8b871fce 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -203,9 +203,6 @@ static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags) { struct file_lock *lock = (struct file_lock *) foo; - if (!(flags & SLAB_CTOR_CONSTRUCTOR)) - return; - locks_init_lock(lock); } diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index c4a554df7b7e..99a12f127769 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -15,6 +15,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include <linux/sched.h> static int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2f4d43a2a310..be4044614ac8 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -73,8 +73,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct minix_inode_info *ei = (struct minix_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index addfd3147ea7..d3152f8d95c6 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -17,6 +17,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/sched.h> #include <linux/ncp_fs.h> #include "ncplib_kernel.h" diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index c29f00ad495d..cf06eb9f050e 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -60,10 +60,8 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - mutex_init(&ei->open_mutex); - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->open_mutex); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 8843a83d4ef0..c67b4bdcf719 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -17,6 +17,7 @@ #include <linux/highuid.h> #include <linux/smp_lock.h> #include <linux/vmalloc.h> +#include <linux/sched.h> #include <linux/ncp_fs.h> diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index db3d7919c601..c2bb14e053e1 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -24,7 +24,7 @@ enum nfs4_callback_opnum { }; struct cb_compound_hdr_arg { - int taglen; + unsigned int taglen; const char *tag; unsigned int callback_ident; unsigned nops; @@ -32,7 +32,7 @@ struct cb_compound_hdr_arg { struct cb_compound_hdr_res { __be32 *status; - int taglen; + unsigned int taglen; const char *tag; __be32 *nops; }; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 50c6821bad26..881fa4900923 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -12,7 +12,7 @@ #include <linux/module.h> #include <linux/init.h> - +#include <linux/sched.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 841c99a9b11c..7f37d1bea83f 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -226,7 +226,7 @@ restart: spin_unlock(&clp->cl_lock); } -int nfs_do_expire_all_delegations(void *ptr) +static int nfs_do_expire_all_delegations(void *ptr) { struct nfs_client *clp = ptr; struct nfs_delegation *delegation; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3df428816559..c27258b5d3e1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,6 +33,7 @@ #include <linux/pagevec.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/sched.h> #include "nfs4_fs.h" #include "delegation.h" @@ -607,7 +608,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) return res; } -loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) { mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); switch (origin) { @@ -633,7 +634,7 @@ out: * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. */ -int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) +static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) { dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5eaee6dd040b..9eb8eb4e4a08 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/smp_lock.h> +#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2a3fd9573207..bd9f5a836592 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/init.h> - +#include <linux/sched.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -1164,21 +1164,19 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct nfs_inode *nfsi = (struct nfs_inode *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&nfsi->vfs_inode); - spin_lock_init(&nfsi->req_lock); - INIT_LIST_HEAD(&nfsi->dirty); - INIT_LIST_HEAD(&nfsi->commit); - INIT_LIST_HEAD(&nfsi->open_files); - INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); - INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); - atomic_set(&nfsi->data_updates, 0); - nfsi->ndirty = 0; - nfsi->ncommit = 0; - nfsi->npages = 0; - nfs4_init_once(nfsi); - } + inode_init_once(&nfsi->vfs_inode); + spin_lock_init(&nfsi->req_lock); + INIT_LIST_HEAD(&nfsi->dirty); + INIT_LIST_HEAD(&nfsi->commit); + INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); + INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); + nfsi->ndirty = 0; + nfsi->ncommit = 0; + nfsi->npages = 0; + nfs4_init_once(nfsi); } static int __init nfs_init_inodecache(void) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d6a30e965787..648e0ac0f90e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -790,7 +790,7 @@ out: return -EACCES; } -int nfs4_recover_expired_lease(struct nfs_server *server) +static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; int ret; @@ -2748,7 +2748,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; int ret = errorcode; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5fffbdfa971f..8ed79d5c54f9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -104,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) return cred; } -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 938f37166788..8003c91ccb9a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -646,10 +646,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) { __be32 *p; - RESERVE_SPACE(8+sizeof(arg->stateid->data)); + RESERVE_SPACE(8+NFS4_STATEID_SIZE); WRITE32(OP_CLOSE); WRITE32(arg->seqid->sequence->counter); - WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); + WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); return 0; } @@ -793,17 +793,17 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) WRITE64(nfs4_lock_length(args->fl)); WRITE32(args->new_lock_owner); if (args->new_lock_owner){ - RESERVE_SPACE(40); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+20); WRITE32(args->open_seqid->sequence->counter); - WRITEMEM(args->open_stateid->data, sizeof(args->open_stateid->data)); + WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); WRITE64(args->lock_owner.clientid); WRITE32(4); WRITE32(args->lock_owner.id); } else { - RESERVE_SPACE(20); - WRITEMEM(args->lock_stateid->data, sizeof(args->lock_stateid->data)); + RESERVE_SPACE(NFS4_STATEID_SIZE+4); + WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); } @@ -830,11 +830,11 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg { __be32 *p; - RESERVE_SPACE(44); + RESERVE_SPACE(12+NFS4_STATEID_SIZE+16); WRITE32(OP_LOCKU); WRITE32(nfs4_lock_type(args->fl, 0)); WRITE32(args->seqid->sequence->counter); - WRITEMEM(args->stateid->data, sizeof(args->stateid->data)); + WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); @@ -966,9 +966,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc { __be32 *p; - RESERVE_SPACE(4+sizeof(stateid->data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); - WRITEMEM(stateid->data, sizeof(stateid->data)); + WRITEMEM(stateid->data, NFS4_STATEID_SIZE); encode_string(xdr, name->len, name->name); } @@ -996,9 +996,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con { __be32 *p; - RESERVE_SPACE(8+sizeof(arg->stateid->data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); WRITE32(OP_OPEN_CONFIRM); - WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); + WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); return 0; @@ -1008,9 +1008,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea { __be32 *p; - RESERVE_SPACE(8+sizeof(arg->stateid->data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); WRITE32(OP_OPEN_DOWNGRADE); - WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); + WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; @@ -1045,12 +1045,12 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context nfs4_stateid stateid; __be32 *p; - RESERVE_SPACE(16); + RESERVE_SPACE(NFS4_STATEID_SIZE); if (ctx->state != NULL) { nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); - WRITEMEM(stateid.data, sizeof(stateid.data)); + WRITEMEM(stateid.data, NFS4_STATEID_SIZE); } else - WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); } static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) @@ -1079,10 +1079,10 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg int replen; __be32 *p; - RESERVE_SPACE(32+sizeof(nfs4_verifier)); + RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20); WRITE32(OP_READDIR); WRITE64(readdir->cookie); - WRITEMEM(readdir->verifier.data, sizeof(readdir->verifier.data)); + WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE); WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ WRITE32(readdir->count); WRITE32(2); @@ -1190,9 +1190,9 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) { __be32 *p; - RESERVE_SPACE(4+sizeof(zero_stateid.data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_SETATTR); - WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); RESERVE_SPACE(2*4); WRITE32(1); WRITE32(FATTR4_WORD0_ACL); @@ -1220,9 +1220,9 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs * int status; __be32 *p; - RESERVE_SPACE(4+sizeof(arg->stateid.data)); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_SETATTR); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); if ((status = encode_attrs(xdr, arg->iap, server))) return status; @@ -1234,9 +1234,9 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien { __be32 *p; - RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data)); + RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE); WRITE32(OP_SETCLIENTID); - WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data)); + WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); RESERVE_SPACE(4); @@ -1253,10 +1253,10 @@ static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_c { __be32 *p; - RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data)); + RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); WRITE32(OP_SETCLIENTID_CONFIRM); WRITE64(client_state->cl_clientid); - WRITEMEM(client_state->cl_confirm.data, sizeof(client_state->cl_confirm.data)); + WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); return 0; } @@ -1284,10 +1284,10 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei { __be32 *p; - RESERVE_SPACE(20); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_DELEGRETURN); - WRITEMEM(stateid->data, sizeof(stateid->data)); + WRITEMEM(stateid->data, NFS4_STATEID_SIZE); return 0; } @@ -2494,7 +2494,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st int i; dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); for (i = loc->nservers; i < m; i++) { - int len; + unsigned int len; char *data; status = decode_opaque_inline(xdr, &len, &data); if (unlikely(status != 0)) @@ -2642,7 +2642,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t return 0; } -static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid) +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) { uint32_t len; __be32 *p; @@ -2667,7 +2667,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf return 0; } -static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid) +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) { uint32_t len; __be32 *p; @@ -2897,8 +2897,8 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) status = decode_op_hdr(xdr, OP_CLOSE); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); return 0; } @@ -3186,8 +3186,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) status = decode_op_hdr(xdr, OP_LOCK); if (status == 0) { - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); } else if (status == -NFS4ERR_DENIED) return decode_lock_denied(xdr, NULL); return status; @@ -3209,8 +3209,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) status = decode_op_hdr(xdr, OP_LOCKU); if (status == 0) { - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); } return status; } @@ -3251,8 +3251,8 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) res->delegation_type = 0; return 0; } - READ_BUF(20); - COPYMEM(res->delegation.data, sizeof(res->delegation.data)); + READ_BUF(NFS4_STATEID_SIZE+4); + COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); READ32(res->do_recall); switch (delegation_type) { case NFS4_OPEN_DELEGATE_READ: @@ -3275,8 +3275,8 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) status = decode_op_hdr(xdr, OP_OPEN); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); decode_change_info(xdr, &res->cinfo); @@ -3302,8 +3302,8 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); return 0; } @@ -3315,8 +3315,8 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); if (status) return status; - READ_BUF(sizeof(res->stateid.data)); - COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); return 0; } @@ -3590,9 +3590,9 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) } READ32(nfserr); if (nfserr == NFS_OK) { - READ_BUF(8 + sizeof(clp->cl_confirm.data)); + READ_BUF(8 + NFS4_VERIFIER_SIZE); READ64(clp->cl_clientid); - COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data)); + COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE); } else if (nfserr == NFSERR_CLID_INUSE) { uint32_t len; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e12054c86d0d..cbdd1c6aaa94 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/file.h> +#include <linux/sched.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs3.h> #include <linux/nfs4.h> diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9a55807b2a70..7bd7cb95c034 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -79,7 +79,7 @@ void nfs_readdata_release(void *data) static int nfs_return_empty_page(struct page *page) { - memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); + zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); SetPageUptodate(page); unlock_page(page); return 0; @@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) pglen = PAGE_CACHE_SIZE - base; for (;;) { if (remainder <= pglen) { - memclear_highpage_flush(*pages, base, remainder); + zero_user_page(*pages, base, remainder, KM_USER0); break; } - memclear_highpage_flush(*pages, base, pglen); + zero_user_page(*pages, base, pglen, KM_USER0); pages++; remainder -= pglen; pglen = PAGE_CACHE_SIZE; @@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, return PTR_ERR(new); } if (len < PAGE_CACHE_SIZE) - memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); + zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_list_add_request(new, &one_request); if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) @@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page) return PTR_ERR(new); } if (len < PAGE_CACHE_SIZE) - memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); + zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); nfs_pageio_add_request(desc->pgio, new); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index de92b9509d94..b084c03ce493 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -58,7 +58,7 @@ struct nfs_write_data *nfs_commit_alloc(void) return p; } -void nfs_commit_rcu_free(struct rcu_head *head) +static void nfs_commit_rcu_free(struct rcu_head *head) { struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) @@ -168,7 +168,7 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int if (count != nfs_page_length(page)) return; if (count != PAGE_CACHE_SIZE) - memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); + zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0); SetPageUptodate(page); } @@ -922,7 +922,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i return 0; out_bad: while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); + req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); nfs_end_page_writeback(req->wb_page); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 32ffea033c7a..864090edc28b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -38,6 +38,7 @@ #include <linux/inet.h> #include <linux/errno.h> #include <linux/delay.h> +#include <linux/sched.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index c7774e3a9469..ebd03cc07479 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -45,7 +45,7 @@ #include <asm/uaccess.h> #include <asm/scatterlist.h> #include <linux/crypto.h> - +#include <linux/sched.h> #define NFSDDBG_FACILITY NFSDDBG_PROC diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d7759ce6ed94..ff55950efb43 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -9,7 +9,7 @@ */ #include <linux/module.h> - +#include <linux/sched.h> #include <linux/time.h> #include <linux/errno.h> #include <linux/nfs.h> diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 39a1669506bd..7ed56390b582 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -26,6 +26,7 @@ #include <linux/swap.h> #include <linux/uio.h> #include <linux/writeback.h> +#include <linux/sched.h> #include <asm/page.h> #include <asm/uaccess.h> diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 21d834e5ed73..4566b9182551 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3085,8 +3085,7 @@ static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep, { ntfs_inode *ni = (ntfs_inode *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(VFS_I(ni)); + inode_init_once(VFS_I(ni)); } /* diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 5671cf9d6383..fd8cb1badc9b 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -262,12 +262,10 @@ static void dlmfs_init_once(void *foo, struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - ip->ip_dlm = NULL; - ip->ip_parent = NULL; + ip->ip_dlm = NULL; + ip->ip_parent = NULL; - inode_init_once(&ip->ip_vfs_inode); - } + inode_init_once(&ip->ip_vfs_inode); } static struct inode *dlmfs_alloc_inode(struct super_block *sb) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7c5e3f5d6634..86b559c7dce9 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -937,31 +937,29 @@ static void ocfs2_inode_init_once(void *data, { struct ocfs2_inode_info *oi = data; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - oi->ip_flags = 0; - oi->ip_open_count = 0; - spin_lock_init(&oi->ip_lock); - ocfs2_extent_map_init(&oi->vfs_inode); - INIT_LIST_HEAD(&oi->ip_io_markers); - oi->ip_created_trans = 0; - oi->ip_last_trans = 0; - oi->ip_dir_start_lookup = 0; + oi->ip_flags = 0; + oi->ip_open_count = 0; + spin_lock_init(&oi->ip_lock); + ocfs2_extent_map_init(&oi->vfs_inode); + INIT_LIST_HEAD(&oi->ip_io_markers); + oi->ip_created_trans = 0; + oi->ip_last_trans = 0; + oi->ip_dir_start_lookup = 0; - init_rwsem(&oi->ip_alloc_sem); - mutex_init(&oi->ip_io_mutex); + init_rwsem(&oi->ip_alloc_sem); + mutex_init(&oi->ip_io_mutex); - oi->ip_blkno = 0ULL; - oi->ip_clusters = 0; + oi->ip_blkno = 0ULL; + oi->ip_clusters = 0; - ocfs2_lock_res_init_once(&oi->ip_rw_lockres); - ocfs2_lock_res_init_once(&oi->ip_meta_lockres); - ocfs2_lock_res_init_once(&oi->ip_data_lockres); - ocfs2_lock_res_init_once(&oi->ip_open_lockres); + ocfs2_lock_res_init_once(&oi->ip_rw_lockres); + ocfs2_lock_res_init_once(&oi->ip_meta_lockres); + ocfs2_lock_res_init_once(&oi->ip_data_lockres); + ocfs2_lock_res_init_once(&oi->ip_open_lockres); - ocfs2_metadata_cache_init(&oi->vfs_inode); + ocfs2_metadata_cache_init(&oi->vfs_inode); - inode_init_once(&oi->vfs_inode); - } + inode_init_once(&oi->vfs_inode); } static int ocfs2_initialize_mem_caches(void) diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 731a90e9f0cd..e62397341c36 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -419,8 +419,7 @@ static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned { struct op_inode_info *oi = (struct op_inode_info *) data; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&oi->vfs_inode); + inode_init_once(&oi->vfs_inode); } static int __init init_openprom_fs(void) diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 7638a1c42a7d..a99acd8de353 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -166,8 +166,12 @@ config LDM_PARTITION depends on PARTITION_ADVANCED ---help--- Say Y here if you would like to use hard disks under Linux which - were partitioned using Windows 2000's or XP's Logical Disk Manager. - They are also known as "Dynamic Disks". + were partitioned using Windows 2000's/XP's or Vista's Logical Disk + Manager. They are also known as "Dynamic Disks". + + Note this driver only supports Dynamic Disks with a protective MBR + label, i.e. DOS partition table. It does not support GPT labelled + Dynamic Disks yet as can be created with Vista. Windows 2000 introduced the concept of Dynamic Disks to get around the limitations of the PC's partitioning scheme. The Logical Disk @@ -175,8 +179,8 @@ config LDM_PARTITION mirrored, striped or RAID volumes, all without the need for rebooting. - Normal partitions are now called Basic Disks under Windows 2000 and - XP. + Normal partitions are now called Basic Disks under Windows 2000, XP, + and Vista. For a fuller description read <file:Documentation/ldm.txt>. diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 1a60926a4ccd..99873a2b4cbc 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -2,10 +2,10 @@ * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> * - * Documentation is available at http://linux-ntfs.sf.net/ldm + * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/ * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software @@ -62,7 +62,6 @@ static void _ldm_printk (const char *level, const char *function, printk ("%s%s(): %s\n", level, function, buf); } - /** * ldm_parse_hexbyte - Convert a ASCII hex number to a byte * @src: Pointer to at least 2 characters to convert. @@ -118,7 +117,6 @@ static bool ldm_parse_guid (const u8 *src, u8 *dest) return true; } - /** * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure * @data: Raw database PRIVHEAD structure loaded from the device @@ -130,46 +128,48 @@ static bool ldm_parse_guid (const u8 *src, u8 *dest) * Return: 'true' @ph contains the PRIVHEAD data * 'false' @ph contents are undefined */ -static bool ldm_parse_privhead (const u8 *data, struct privhead *ph) +static bool ldm_parse_privhead(const u8 *data, struct privhead *ph) { - BUG_ON (!data || !ph); + bool is_vista = false; - if (MAGIC_PRIVHEAD != BE64 (data)) { - ldm_error ("Cannot find PRIVHEAD structure. LDM database is" + BUG_ON(!data || !ph); + if (MAGIC_PRIVHEAD != BE64(data)) { + ldm_error("Cannot find PRIVHEAD structure. LDM database is" " corrupt. Aborting."); return false; } - - ph->ver_major = BE16 (data + 0x000C); - ph->ver_minor = BE16 (data + 0x000E); - ph->logical_disk_start = BE64 (data + 0x011B); - ph->logical_disk_size = BE64 (data + 0x0123); - ph->config_start = BE64 (data + 0x012B); - ph->config_size = BE64 (data + 0x0133); - - if ((ph->ver_major != 2) || (ph->ver_minor != 11)) { - ldm_error ("Expected PRIVHEAD version %d.%d, got %d.%d." - " Aborting.", 2, 11, ph->ver_major, ph->ver_minor); + ph->ver_major = BE16(data + 0x000C); + ph->ver_minor = BE16(data + 0x000E); + ph->logical_disk_start = BE64(data + 0x011B); + ph->logical_disk_size = BE64(data + 0x0123); + ph->config_start = BE64(data + 0x012B); + ph->config_size = BE64(data + 0x0133); + /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ + if (ph->ver_major == 2 && ph->ver_minor == 12) + is_vista = true; + if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) { + ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d." + " Aborting.", ph->ver_major, ph->ver_minor); return false; } + ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major, + ph->ver_minor, is_vista ? "Vista" : "2000/XP"); if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */ - /* Warn the user and continue, carefully */ - ldm_info ("Database is normally %u bytes, it claims to " + /* Warn the user and continue, carefully. */ + ldm_info("Database is normally %u bytes, it claims to " "be %llu bytes.", LDM_DB_SIZE, - (unsigned long long)ph->config_size ); + (unsigned long long)ph->config_size); } - if ((ph->logical_disk_size == 0) || - (ph->logical_disk_start + ph->logical_disk_size > ph->config_start)) { - ldm_error ("PRIVHEAD disk size doesn't match real disk size"); + if ((ph->logical_disk_size == 0) || (ph->logical_disk_start + + ph->logical_disk_size > ph->config_start)) { + ldm_error("PRIVHEAD disk size doesn't match real disk size"); return false; } - - if (!ldm_parse_guid (data + 0x0030, ph->disk_id)) { - ldm_error ("PRIVHEAD contains an invalid GUID."); + if (!ldm_parse_guid(data + 0x0030, ph->disk_id)) { + ldm_error("PRIVHEAD contains an invalid GUID."); return false; } - - ldm_debug ("Parsed PRIVHEAD successfully."); + ldm_debug("Parsed PRIVHEAD successfully."); return true; } @@ -409,7 +409,7 @@ out: * Return: 'true' @toc1 contains validated TOCBLOCK info * 'false' @toc1 contents are undefined */ -static bool ldm_validate_tocblocks (struct block_device *bdev, +static bool ldm_validate_tocblocks(struct block_device *bdev, unsigned long base, struct ldmdb *ldb) { static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; @@ -417,54 +417,57 @@ static bool ldm_validate_tocblocks (struct block_device *bdev, struct privhead *ph; Sector sect; u8 *data; + int i, nr_tbs; bool result = false; - int i; - BUG_ON (!bdev || !ldb); - - ph = &ldb->ph; + BUG_ON(!bdev || !ldb); + ph = &ldb->ph; tb[0] = &ldb->toc; - tb[1] = kmalloc (sizeof (*tb[1]), GFP_KERNEL); - tb[2] = kmalloc (sizeof (*tb[2]), GFP_KERNEL); - tb[3] = kmalloc (sizeof (*tb[3]), GFP_KERNEL); - if (!tb[1] || !tb[2] || !tb[3]) { - ldm_crit ("Out of memory."); - goto out; + tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); + if (!tb[1]) { + ldm_crit("Out of memory."); + goto err; } - - for (i = 0; i < 4; i++) /* Read and parse all four toc's. */ - { - data = read_dev_sector (bdev, base + off[i], §); + tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1])); + tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2])); + /* + * Try to read and parse all four TOCBLOCKs. + * + * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so + * skip any that fail as long as we get at least one valid TOCBLOCK. + */ + for (nr_tbs = i = 0; i < 4; i++) { + data = read_dev_sector(bdev, base + off[i], §); if (!data) { - ldm_crit ("Disk read failed."); - goto out; + ldm_error("Disk read failed for TOCBLOCK %d.", i); + continue; } - result = ldm_parse_tocblock (data, tb[i]); - put_dev_sector (sect); - if (!result) - goto out; /* Already logged */ + if (ldm_parse_tocblock(data, tb[nr_tbs])) + nr_tbs++; + put_dev_sector(sect); } - - /* Range check the toc against a privhead. */ + if (!nr_tbs) { + ldm_crit("Failed to find a valid TOCBLOCK."); + goto err; + } + /* Range check the TOCBLOCK against a privhead. */ if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) || - ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) > ph->config_size)) { - ldm_crit ("The bitmaps are out of range. Giving up."); - goto out; + ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) > + ph->config_size)) { + ldm_crit("The bitmaps are out of range. Giving up."); + goto err; } - - if (!ldm_compare_tocblocks (tb[0], tb[1]) || /* Compare all tocs. */ - !ldm_compare_tocblocks (tb[0], tb[2]) || - !ldm_compare_tocblocks (tb[0], tb[3])) { - ldm_crit ("The TOCBLOCKs don't match."); - goto out; + /* Compare all loaded TOCBLOCKs. */ + for (i = 1; i < nr_tbs; i++) { + if (!ldm_compare_tocblocks(tb[0], tb[i])) { + ldm_crit("TOCBLOCKs 0 and %d do not match.", i); + goto err; + } } - - ldm_debug ("Validated TOCBLOCKs successfully."); + ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs); result = true; -out: - kfree (tb[1]); - kfree (tb[2]); - kfree (tb[3]); +err: + kfree(tb[1]); return result; } @@ -566,7 +569,7 @@ static bool ldm_validate_partition_table (struct block_device *bdev) p = (struct partition*)(data + 0x01BE); for (i = 0; i < 4; i++, p++) - if (SYS_IND (p) == WIN2K_DYNAMIC_PARTITION) { + if (SYS_IND (p) == LDM_PARTITION) { result = true; break; } @@ -975,44 +978,68 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) * Return: 'true' @vb contains a Partition VBLK * 'false' @vb contents are not defined */ -static bool ldm_parse_prt3 (const u8 *buffer, int buflen, struct vblk *vb) +static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) { int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len; struct vblk_part *part; - BUG_ON (!buffer || !vb); - - r_objid = ldm_relative (buffer, buflen, 0x18, 0); - r_name = ldm_relative (buffer, buflen, 0x18, r_objid); - r_size = ldm_relative (buffer, buflen, 0x34, r_name); - r_parent = ldm_relative (buffer, buflen, 0x34, r_size); - r_diskid = ldm_relative (buffer, buflen, 0x34, r_parent); - + BUG_ON(!buffer || !vb); + r_objid = ldm_relative(buffer, buflen, 0x18, 0); + if (r_objid < 0) { + ldm_error("r_objid %d < 0", r_objid); + return false; + } + r_name = ldm_relative(buffer, buflen, 0x18, r_objid); + if (r_name < 0) { + ldm_error("r_name %d < 0", r_name); + return false; + } + r_size = ldm_relative(buffer, buflen, 0x34, r_name); + if (r_size < 0) { + ldm_error("r_size %d < 0", r_size); + return false; + } + r_parent = ldm_relative(buffer, buflen, 0x34, r_size); + if (r_parent < 0) { + ldm_error("r_parent %d < 0", r_parent); + return false; + } + r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent); + if (r_diskid < 0) { + ldm_error("r_diskid %d < 0", r_diskid); + return false; + } if (buffer[0x12] & VBLK_FLAG_PART_INDEX) { - r_index = ldm_relative (buffer, buflen, 0x34, r_diskid); + r_index = ldm_relative(buffer, buflen, 0x34, r_diskid); + if (r_index < 0) { + ldm_error("r_index %d < 0", r_index); + return false; + } len = r_index; } else { r_index = 0; len = r_diskid; } - if (len < 0) + if (len < 0) { + ldm_error("len %d < 0", len); return false; - + } len += VBLK_SIZE_PRT3; - if (len != BE32 (buffer + 0x14)) + if (len > BE32(buffer + 0x14)) { + ldm_error("len %d > BE32(buffer + 0x14) %d", len, + BE32(buffer + 0x14)); return false; - + } part = &vb->vblk.part; - part->start = BE64 (buffer + 0x24 + r_name); - part->volume_offset = BE64 (buffer + 0x2C + r_name); - part->size = ldm_get_vnum (buffer + 0x34 + r_name); - part->parent_id = ldm_get_vnum (buffer + 0x34 + r_size); - part->disk_id = ldm_get_vnum (buffer + 0x34 + r_parent); + part->start = BE64(buffer + 0x24 + r_name); + part->volume_offset = BE64(buffer + 0x2C + r_name); + part->size = ldm_get_vnum(buffer + 0x34 + r_name); + part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); + part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); if (vb->flags & VBLK_FLAG_PART_INDEX) part->partnum = buffer[0x35 + r_diskid]; else part->partnum = 0; - return true; } @@ -1475,4 +1502,3 @@ out: kfree (ldb); return result; } - diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index 6e8d7952b8b5..d2e6a3046939 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h @@ -2,10 +2,10 @@ * ldm - Part of the Linux-NTFS project. * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> - * Copyright (C) 2001 Anton Altaparmakov <aia21@cantab.net> + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> * - * Documentation is available at http://linux-ntfs.sf.net/ldm + * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -93,7 +93,7 @@ struct parsed_partitions; #define OFF_VMDB 17 /* List of partitions. */ -#define WIN2K_DYNAMIC_PARTITION 0x42 /* Formerly SFS (Landis). */ +#define LDM_PARTITION 0x42 /* Formerly SFS (Landis). */ #define TOC_BITMAP1 "config" /* Names of the two defined */ #define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b8171907c83b..d5ce65c68d7b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -109,8 +109,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct proc_inode *ei = (struct proc_inode *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } int __init proc_init_inodecache(void) diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 75fc8498f2e2..8d256eb11813 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -536,8 +536,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/quota.c b/fs/quota.c index e9d88fd0eca8..9f237d6182c9 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -157,7 +157,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t static void quota_sync_sb(struct super_block *sb, int type) { int cnt; - struct inode *discard[MAXQUOTAS]; sb->s_qcop->quota_sync(sb, type); /* This is not very clever (and fast) but currently I don't know about @@ -167,29 +166,21 @@ static void quota_sync_sb(struct super_block *sb, int type) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); - /* Now when everything is written we can discard the pagecache so - * that userspace sees the changes. We need i_mutex and so we could - * not do it inside dqonoff_mutex. Moreover we need to be carefull - * about races with quotaoff() (that is the reason why we have own - * reference to inode). */ + /* + * Now when everything is written we can discard the pagecache so + * that userspace sees the changes. + */ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - discard[cnt] = NULL; if (type != -1 && cnt != type) continue; if (!sb_has_quota_enabled(sb, cnt)) continue; - discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]); + mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA); + truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0); + mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex); } mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (discard[cnt]) { - mutex_lock(&discard[cnt]->i_mutex); - truncate_inode_pages(&discard[cnt]->i_data, 0); - mutex_unlock(&discard[cnt]->i_mutex); - iput(discard[cnt]); - } - } } void sync_dquots(struct super_block *sb, int type) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 4ace5d72eae1..d40d22b347b7 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -32,7 +32,7 @@ #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/ramfs.h> - +#include <linux/sched.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c7762140c425..b4ac9119200e 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -511,14 +511,12 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - INIT_LIST_HEAD(&ei->i_prealloc_list); - inode_init_once(&ei->vfs_inode); + INIT_LIST_HEAD(&ei->i_prealloc_list); + inode_init_once(&ei->vfs_inode); #ifdef CONFIG_REISERFS_FS_POSIX_ACL - ei->i_acl_access = NULL; - ei->i_acl_default = NULL; + ei->i_acl_access = NULL; + ei->i_acl_default = NULL; #endif - } } static int init_inodecache(void) diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 804285190271..2284e03342c6 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -566,12 +566,11 @@ static void romfs_destroy_inode(struct inode *inode) kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } -static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { - struct romfs_inode_info *ei = (struct romfs_inode_info *) foo; + struct romfs_inode_info *ei = foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 50136b1a3eca..48da4fa6b7d4 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -13,6 +13,7 @@ #include <linux/smp_lock.h> #include <linux/ctype.h> #include <linux/net.h> +#include <linux/sched.h> #include <linux/smb_fs.h> #include <linux/smb_mount.h> diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index f161797160c4..aea3f8aa54c0 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -17,6 +17,7 @@ #include <linux/pagemap.h> #include <linux/smp_lock.h> #include <linux/net.h> +#include <linux/aio.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 424a3ddf86dd..6724a6cf01ff 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -25,6 +25,7 @@ #include <linux/net.h> #include <linux/vfs.h> #include <linux/highuid.h> +#include <linux/sched.h> #include <linux/smb_fs.h> #include <linux/smbno.h> #include <linux/smb_mount.h> @@ -70,8 +71,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct smb_inode_info *ei = (struct smb_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index c288fbe7953d..3f54a0f80fae 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -11,6 +11,7 @@ #include <linux/fs.h> #include <linux/slab.h> #include <linux/net.h> +#include <linux/sched.h> #include <linux/smb_fs.h> #include <linux/smbno.h> diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 4de5c6b89918..bdd30e74de6b 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -13,6 +13,7 @@ #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/errno.h> +#include <linux/sched.h> #include <asm/semaphore.h> #include "sysfs.h" diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3152d7415606..564411693394 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -322,8 +322,7 @@ static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&si->vfs_inode); + inode_init_once(&si->vfs_inode); } const struct super_operations sysv_sops = { diff --git a/fs/timerfd.c b/fs/timerfd.c index e329e37f15a8..af9eca5c0230 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -24,7 +24,6 @@ struct timerfd_ctx { struct hrtimer tmr; ktime_t tintv; - spinlock_t lock; wait_queue_head_t wqh; int expired; }; @@ -39,10 +38,10 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); unsigned long flags; - spin_lock_irqsave(&ctx->lock, flags); + spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; wake_up_locked(&ctx->wqh); - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); return HRTIMER_NORESTART; } @@ -83,10 +82,10 @@ static unsigned int timerfd_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->wqh, wait); - spin_lock_irqsave(&ctx->lock, flags); + spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->expired) events |= POLLIN; - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); return events; } @@ -101,7 +100,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, if (count < sizeof(ticks)) return -EINVAL; - spin_lock_irq(&ctx->lock); + spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { __add_wait_queue(&ctx->wqh, &wait); @@ -115,9 +114,9 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, res = -ERESTARTSYS; break; } - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); schedule(); - spin_lock_irq(&ctx->lock); + spin_lock_irq(&ctx->wqh.lock); } __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); @@ -139,7 +138,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, } else ticks = 1; } - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); if (ticks) res = put_user(ticks, buf) ? -EFAULT: sizeof(ticks); return res; @@ -176,7 +175,6 @@ asmlinkage long sys_timerfd(int ufd, int clockid, int flags, return -ENOMEM; init_waitqueue_head(&ctx->wqh); - spin_lock_init(&ctx->lock); timerfd_setup(ctx, clockid, flags, &ktmr); @@ -202,10 +200,10 @@ asmlinkage long sys_timerfd(int ufd, int clockid, int flags, * it to the new values. */ for (;;) { - spin_lock_irq(&ctx->lock); + spin_lock_irq(&ctx->wqh.lock); if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) break; - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); cpu_relax(); } /* @@ -213,7 +211,7 @@ asmlinkage long sys_timerfd(int ufd, int clockid, int flags, */ timerfd_setup(ctx, clockid, flags, &ktmr); - spin_unlock_irq(&ctx->lock); + spin_unlock_irq(&ctx->wqh.lock); fput(file); } diff --git a/fs/udf/file.c b/fs/udf/file.c index 40d5047defea..51b5764685e7 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -36,6 +36,7 @@ #include <linux/smp_lock.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> +#include <linux/aio.h> #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 91df4928651c..51fe307dc0ec 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -30,6 +30,7 @@ #include <linux/quotaops.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/sched.h> static inline int udf_match(int len1, const char *name1, int len2, const char *name2) { diff --git a/fs/udf/super.c b/fs/udf/super.c index 9b8644a06e53..3a743d854c17 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -134,10 +134,8 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct udf_inode_info *ei = (struct udf_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - ei->i_ext.i_data = NULL; - inode_init_once(&ei->vfs_inode); - } + ei->i_ext.i_data = NULL; + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index be7c48c5f203..22ff6ed55ce9 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1237,8 +1237,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 14e2cbe5a8d5..bf9a9d5909be 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -360,8 +360,7 @@ xfs_fs_inode_init_once( kmem_zone_t *zonep, unsigned long flags) { - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); + inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); } STATIC int |