From 8a146a2b0d6e97941a5c2dc5d8a3ea1e6c3ab997 Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Wed, 14 Nov 2007 16:58:27 -0800 Subject: eCryptfs: cast page->index to loff_t instead of off_t page->index should be cast to loff_t instead of off_t. Signed-off-by: Michael Halcrow Reported-by: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/read_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 2150edf9a58e..6b7474a4336a 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -87,7 +87,7 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, loff_t offset; int rc; - offset = ((((off_t)page_for_lower->index) << PAGE_CACHE_SHIFT) + offset = ((((loff_t)page_for_lower->index) << PAGE_CACHE_SHIFT) + offset_in_page); virt = kmap(page_for_lower); rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size); -- cgit v1.2.3 From e47776a0a41a14a5634633c96e590827f552c4b5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Nov 2007 16:58:56 -0800 Subject: Forbid user to change file flags on quota files Forbid user from changing file flags on quota files. User has no bussiness in playing with these flags when quota is on. Furthermore there is a remote possibility of deadlock due to a lock inversion between quota file's i_mutex and transaction's start (i_mutex for quota file is locked only when trasaction is started in quota operations) in ext3 and ext4. Signed-off-by: Jan Kara Cc: LIOU Payphone Cc: Acked-by: Dave Kleikamp Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/ioctl.c | 5 +++++ fs/ext3/ioctl.c | 5 +++++ fs/ext4/ioctl.c | 5 +++++ fs/jfs/ioctl.c | 3 +++ fs/reiserfs/ioctl.c | 3 +++ 5 files changed, 21 insertions(+) (limited to 'fs') diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index c2324d5fe4ac..320b2cb3d4d2 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -47,6 +47,11 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, flags &= ~EXT2_DIRSYNC_FL; mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } oldflags = ei->i_flags; /* diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 4a2a02c95bf9..023a070f55f1 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -51,6 +51,11 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, flags &= ~EXT3_DIRSYNC_FL; mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c04c7ccba9e3..e7f894bdb420 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -51,6 +51,11 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, flags &= ~EXT4_DIRSYNC_FL; mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + mutex_unlock(&inode->i_mutex); + return -EPERM; + } oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 3c8663bea98c..dfda12a073e1 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -79,6 +79,9 @@ int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, if (!S_ISDIR(inode->i_mode)) flags &= ~JFS_DIRSYNC_FL; + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) + return -EPERM; jfs_get_inode_flags(jfs_inode); oldflags = jfs_inode->mode2; diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index c438a8f83f26..e0f0f098a523 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -57,6 +57,9 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (get_user(flags, (int __user *)arg)) return -EFAULT; + /* Is it quota file? Do not allow user to mess with it. */ + if (IS_NOQUOTA(inode)) + return -EPERM; if (((flags ^ REISERFS_I(inode)-> i_attrs) & (REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) -- cgit v1.2.3 From e1a1c997afe907e6ec4799e4be0f38cffd8b418c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 14 Nov 2007 16:59:08 -0800 Subject: proc: fix proc_kill_inodes to kill dentries on all proc superblocks It appears we overlooked support for removing generic proc files when we added support for multiple proc super blocks. Handle that now. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Eric W. Biederman Acked-by: Pavel Emelyanov Cc: Alexey Dobriyan Acked-by: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 39 ++++++++++++++++++++++----------------- fs/proc/internal.h | 2 ++ fs/proc/root.c | 2 +- 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1bdb62435758..a9806bc21ec3 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -561,28 +561,33 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp static void proc_kill_inodes(struct proc_dir_entry *de) { struct list_head *p; - struct super_block *sb = proc_mnt->mnt_sb; + struct super_block *sb; /* * Actually it's a partial revoke(). */ - file_list_lock(); - list_for_each(p, &sb->s_files) { - struct file * filp = list_entry(p, struct file, f_u.fu_list); - struct dentry * dentry = filp->f_path.dentry; - struct inode * inode; - const struct file_operations *fops; - - if (dentry->d_op != &proc_dentry_operations) - continue; - inode = dentry->d_inode; - if (PDE(inode) != de) - continue; - fops = filp->f_op; - filp->f_op = NULL; - fops_put(fops); + spin_lock(&sb_lock); + list_for_each_entry(sb, &proc_fs_type.fs_supers, s_instances) { + file_list_lock(); + list_for_each(p, &sb->s_files) { + struct file *filp = list_entry(p, struct file, + f_u.fu_list); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode; + const struct file_operations *fops; + + if (dentry->d_op != &proc_dentry_operations) + continue; + inode = dentry->d_inode; + if (PDE(inode) != de) + continue; + fops = filp->f_op; + filp->f_op = NULL; + fops_put(fops); + } + file_list_unlock(); } - file_list_unlock(); + spin_unlock(&sb_lock); } static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1820eb2ef762..1b2b6c6bb475 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -78,3 +78,5 @@ static inline int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; } + +extern struct file_system_type proc_fs_type; diff --git a/fs/proc/root.c b/fs/proc/root.c index ec9cb3b6c93b..1f86bb860e04 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -98,7 +98,7 @@ static void proc_kill_sb(struct super_block *sb) put_pid_ns(ns); } -static struct file_system_type proc_fs_type = { +struct file_system_type proc_fs_type = { .name = "proc", .get_sb = proc_get_sb, .kill_sb = proc_kill_sb, -- cgit v1.2.3 From c79fb75e5a514a5a35f22c229042aa29f4237e3a Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:38 -0800 Subject: hugetlb: fix quota management for private mappings The hugetlbfs quota management system was never taught to handle MAP_PRIVATE mappings when that support was added. Currently, quota is debited at page instantiation and credited at file truncation. This approach works correctly for shared pages but is incomplete for private pages. In addition to hugetlb_no_page(), private pages can be instantiated by hugetlb_cow(); but this function does not respect quotas. Private huge pages are treated very much like normal, anonymous pages. They are not "backed" by the hugetlbfs file and are not stored in the mapping's radix tree. This means that private pages are invisible to truncate_hugepages() so that function will not credit the quota. This patch (based on a prototype provided by Ken Chen) moves quota crediting for all pages into free_huge_page(). page->private is used to store a pointer to the mapping to which this page belongs. This is used to credit quota on the appropriate hugetlbfs instance. Signed-off-by: Adam Litke Cc: Ken Chen Cc: Ken Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 1 - mm/hugetlb.c | 13 ++++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 12aca8ed605f..6513f5655861 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -364,7 +364,6 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) ++next; truncate_huge_page(page); unlock_page(page); - hugetlb_put_quota(mapping); freed++; } huge_pagevec_release(&pvec); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f43b3dca12b5..3992bd5120e7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -116,7 +116,9 @@ static void update_and_free_page(struct page *page) static void free_huge_page(struct page *page) { int nid = page_to_nid(page); + struct address_space *mapping; + mapping = (struct address_space *) page_private(page); BUG_ON(page_count(page)); INIT_LIST_HEAD(&page->lru); @@ -129,6 +131,9 @@ static void free_huge_page(struct page *page) enqueue_huge_page(page); } spin_unlock(&hugetlb_lock); + if (mapping) + hugetlb_put_quota(mapping); + set_page_private(page, 0); } /* @@ -388,8 +393,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, page = alloc_huge_page_shared(vma, addr); else page = alloc_huge_page_private(vma, addr); - if (page) + if (page) { set_page_refcounted(page); + set_page_private(page, (unsigned long) vma->vm_file->f_mapping); + } return page; } @@ -730,6 +737,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, set_huge_ptep_writable(vma, address, ptep); return 0; } + if (hugetlb_get_quota(vma->vm_file->f_mapping)) + return VM_FAULT_SIGBUS; page_cache_get(old_page); new_page = alloc_huge_page(vma, address); @@ -796,7 +805,6 @@ retry: err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); if (err) { put_page(page); - hugetlb_put_quota(mapping); if (err == -EEXIST) goto retry; goto out; @@ -830,7 +838,6 @@ out: backout: spin_unlock(&mm->page_table_lock); - hugetlb_put_quota(mapping); unlock_page(page); put_page(page); goto out; -- cgit v1.2.3 From 9a119c056dc2a9970901954a6d561d50a95e528d Mon Sep 17 00:00:00 2001 From: Adam Litke Date: Wed, 14 Nov 2007 16:59:41 -0800 Subject: hugetlb: allow bulk updating in hugetlb_*_quota() Add a second parameter 'delta' to hugetlb_get_quota and hugetlb_put_quota to allow bulk updating of the sbinfo->free_blocks counter. This will be used by the next patch in the series. Signed-off-by: Adam Litke Cc: Ken Chen Cc: Andy Whitcroft Cc: Dave Hansen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 10 +++++----- include/linux/hugetlb.h | 4 ++-- mm/hugetlb.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6513f5655861..09ee07f02663 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -858,15 +858,15 @@ out_free: return -ENOMEM; } -int hugetlb_get_quota(struct address_space *mapping) +int hugetlb_get_quota(struct address_space *mapping, long delta) { int ret = 0; struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); if (sbinfo->free_blocks > -1) { spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks > 0) - sbinfo->free_blocks--; + if (sbinfo->free_blocks - delta >= 0) + sbinfo->free_blocks -= delta; else ret = -ENOMEM; spin_unlock(&sbinfo->stat_lock); @@ -875,13 +875,13 @@ int hugetlb_get_quota(struct address_space *mapping) return ret; } -void hugetlb_put_quota(struct address_space *mapping) +void hugetlb_put_quota(struct address_space *mapping, long delta) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); if (sbinfo->free_blocks > -1) { spin_lock(&sbinfo->stat_lock); - sbinfo->free_blocks++; + sbinfo->free_blocks += delta; spin_unlock(&sbinfo->stat_lock); } } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e22368656b38..8104e5af75ab 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -165,8 +165,8 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t); -int hugetlb_get_quota(struct address_space *mapping); -void hugetlb_put_quota(struct address_space *mapping); +int hugetlb_get_quota(struct address_space *mapping, long delta); +void hugetlb_put_quota(struct address_space *mapping, long delta); static inline int is_file_hugepages(struct file *file) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bc12b0adfa87..1e317465ecd1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -132,7 +132,7 @@ static void free_huge_page(struct page *page) } spin_unlock(&hugetlb_lock); if (mapping) - hugetlb_put_quota(mapping); + hugetlb_put_quota(mapping, 1); set_page_private(page, 0); } @@ -390,7 +390,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *page; struct address_space *mapping = vma->vm_file->f_mapping; - if (hugetlb_get_quota(mapping)) + if (hugetlb_get_quota(mapping, 1)) return ERR_PTR(-VM_FAULT_SIGBUS); if (vma->vm_flags & VM_MAYSHARE) -- cgit v1.2.3 From f433dc56344cb72cc3de5ba0819021cec3aef807 Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Wed, 14 Nov 2007 16:59:47 -0800 Subject: Fixes to the BFS filesystem driver I found a few bugs in the BFS driver. Detailed description of the bugs as well as the steps to reproduce the errors are given in the kernel bugzilla. Please follow these links for more information: http://bugzilla.kernel.org/show_bug.cgi?id=9363 http://bugzilla.kernel.org/show_bug.cgi?id=9364 http://bugzilla.kernel.org/show_bug.cgi?id=9365 http://bugzilla.kernel.org/show_bug.cgi?id=9366 This patch fixes the bugs described above. Besides, the patch introduces coding style changes to make the BFS driver conform to the requirements specified for Linux kernel code. Finally, I made a few cosmetic changes such as removal of trivial debug output. Also, the patch removes the fields `si_lf_ioff' and `si_lf_sblk' of the in-core superblock structure. These fields are initialized but never actually used. If you are wondering why I need BFS, here is the answer: I am using this driver in the context of Linux kernel classes I am teaching in the Moscow State University and in the International Institute of Information Technology in Pune, India. Signed-off-by: Dmitri Vorobiev Cc: Tigran Aivazian Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/bfs/bfs.h | 4 +- fs/bfs/dir.c | 146 ++++++++++++++++++++++++++++++--------------------------- fs/bfs/file.c | 62 +++++++++++++++--------- fs/bfs/inode.c | 127 ++++++++++++++++++++++++------------------------- 4 files changed, 184 insertions(+), 155 deletions(-) (limited to 'fs') diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 130f6c66c5ba..ac7a8b1d6c3a 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -14,8 +14,6 @@ struct bfs_sb_info { unsigned long si_blocks; unsigned long si_freeb; unsigned long si_freei; - unsigned long si_lf_ioff; - unsigned long si_lf_sblk; unsigned long si_lf_eblk; unsigned long si_lasti; unsigned long * si_imap; @@ -39,7 +37,7 @@ static inline struct bfs_sb_info *BFS_SB(struct super_block *sb) static inline struct bfs_inode_info *BFS_I(struct inode *inode) { - return list_entry(inode, struct bfs_inode_info, vfs_inode); + return container_of(inode, struct bfs_inode_info, vfs_inode); } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 097f1497f743..1fd056d0fc3d 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -21,29 +21,32 @@ #define dprintf(x...) #endif -static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino); -static struct buffer_head * bfs_find_entry(struct inode * dir, - const unsigned char * name, int namelen, struct bfs_dirent ** res_dir); +static int bfs_add_entry(struct inode *dir, const unsigned char *name, + int namelen, int ino); +static struct buffer_head *bfs_find_entry(struct inode *dir, + const unsigned char *name, int namelen, + struct bfs_dirent **res_dir); -static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) +static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) { - struct inode * dir = f->f_path.dentry->d_inode; - struct buffer_head * bh; - struct bfs_dirent * de; + struct inode *dir = f->f_path.dentry->d_inode; + struct buffer_head *bh; + struct bfs_dirent *de; unsigned int offset; int block; lock_kernel(); - if (f->f_pos & (BFS_DIRENT_SIZE-1)) { - printf("Bad f_pos=%08lx for %s:%08lx\n", (unsigned long)f->f_pos, - dir->i_sb->s_id, dir->i_ino); + if (f->f_pos & (BFS_DIRENT_SIZE - 1)) { + printf("Bad f_pos=%08lx for %s:%08lx\n", + (unsigned long)f->f_pos, + dir->i_sb->s_id, dir->i_ino); unlock_kernel(); return -EBADF; } while (f->f_pos < dir->i_size) { - offset = f->f_pos & (BFS_BSIZE-1); + offset = f->f_pos & (BFS_BSIZE - 1); block = BFS_I(dir)->i_sblock + (f->f_pos >> BFS_BSIZE_BITS); bh = sb_bread(dir->i_sb, block); if (!bh) { @@ -54,7 +57,9 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) de = (struct bfs_dirent *)(bh->b_data + offset); if (de->ino) { int size = strnlen(de->name, BFS_NAMELEN); - if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) { + if (filldir(dirent, de->name, size, f->f_pos, + le16_to_cpu(de->ino), + DT_UNKNOWN) < 0) { brelse(bh); unlock_kernel(); return 0; @@ -62,7 +67,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) } offset += BFS_DIRENT_SIZE; f->f_pos += BFS_DIRENT_SIZE; - } while (offset < BFS_BSIZE && f->f_pos < dir->i_size); + } while ((offset < BFS_BSIZE) && (f->f_pos < dir->i_size)); brelse(bh); } @@ -78,13 +83,13 @@ const struct file_operations bfs_dir_operations = { extern void dump_imap(const char *, struct super_block *); -static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, - struct nameidata *nd) +static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { int err; - struct inode * inode; - struct super_block * s = dir->i_sb; - struct bfs_sb_info * info = BFS_SB(s); + struct inode *inode; + struct super_block *s = dir->i_sb; + struct bfs_sb_info *info = BFS_SB(s); unsigned long ino; inode = new_inode(s); @@ -97,7 +102,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, iput(inode); return -ENOSPC; } - set_bit(ino, info->si_imap); + set_bit(ino, info->si_imap); info->si_freei--; inode->i_uid = current->fsuid; inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid; @@ -113,9 +118,10 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, BFS_I(inode)->i_eblock = 0; insert_inode_hash(inode); mark_inode_dirty(inode); - dump_imap("create",s); + dump_imap("create", s); - err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, inode->i_ino); + err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, + inode->i_ino); if (err) { inode_dec_link_count(inode); iput(inode); @@ -127,11 +133,12 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, return 0; } -static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { - struct inode * inode = NULL; - struct buffer_head * bh; - struct bfs_dirent * de; + struct inode *inode = NULL; + struct buffer_head *bh; + struct bfs_dirent *de; if (dentry->d_name.len > BFS_NAMELEN) return ERR_PTR(-ENAMETOOLONG); @@ -152,13 +159,15 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st return NULL; } -static int bfs_link(struct dentry * old, struct inode * dir, struct dentry * new) +static int bfs_link(struct dentry *old, struct inode *dir, + struct dentry *new) { - struct inode * inode = old->d_inode; + struct inode *inode = old->d_inode; int err; lock_kernel(); - err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, inode->i_ino); + err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, + inode->i_ino); if (err) { unlock_kernel(); return err; @@ -172,23 +181,23 @@ static int bfs_link(struct dentry * old, struct inode * dir, struct dentry * new return 0; } - -static int bfs_unlink(struct inode * dir, struct dentry * dentry) +static int bfs_unlink(struct inode *dir, struct dentry *dentry) { int error = -ENOENT; - struct inode * inode; - struct buffer_head * bh; - struct bfs_dirent * de; + struct inode *inode; + struct buffer_head *bh; + struct bfs_dirent *de; inode = dentry->d_inode; lock_kernel(); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); - if (!bh || le16_to_cpu(de->ino) != inode->i_ino) + if (!bh || (le16_to_cpu(de->ino) != inode->i_ino)) goto out_brelse; if (!inode->i_nlink) { - printf("unlinking non-existent file %s:%lu (nlink=%d)\n", inode->i_sb->s_id, - inode->i_ino, inode->i_nlink); + printf("unlinking non-existent file %s:%lu (nlink=%d)\n", + inode->i_sb->s_id, inode->i_ino, + inode->i_nlink); inode->i_nlink = 1; } de->ino = 0; @@ -205,12 +214,12 @@ out_brelse: return error; } -static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry) +static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { - struct inode * old_inode, * new_inode; - struct buffer_head * old_bh, * new_bh; - struct bfs_dirent * old_de, * new_de; + struct inode *old_inode, *new_inode; + struct buffer_head *old_bh, *new_bh; + struct bfs_dirent *old_de, *new_de; int error = -ENOENT; old_bh = new_bh = NULL; @@ -223,7 +232,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, old_dentry->d_name.name, old_dentry->d_name.len, &old_de); - if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino) + if (!old_bh || (le16_to_cpu(old_de->ino) != old_inode->i_ino)) goto end_rename; error = -EPERM; @@ -239,7 +248,8 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, if (!new_bh) { error = bfs_add_entry(new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, old_inode->i_ino); + new_dentry->d_name.len, + old_inode->i_ino); if (error) goto end_rename; } @@ -268,11 +278,12 @@ const struct inode_operations bfs_dir_inops = { .rename = bfs_rename, }; -static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino) +static int bfs_add_entry(struct inode *dir, const unsigned char *name, + int namelen, int ino) { - struct buffer_head * bh; - struct bfs_dirent * de; - int block, sblock, eblock, off, eoff; + struct buffer_head *bh; + struct bfs_dirent *de; + int block, sblock, eblock, off, pos; int i; dprintf("name=%s, namelen=%d\n", name, namelen); @@ -284,27 +295,24 @@ static int bfs_add_entry(struct inode * dir, const unsigned char * name, int nam sblock = BFS_I(dir)->i_sblock; eblock = BFS_I(dir)->i_eblock; - eoff = dir->i_size % BFS_BSIZE; - for (block=sblock; block<=eblock; block++) { + for (block = sblock; block <= eblock; block++) { bh = sb_bread(dir->i_sb, block); - if(!bh) + if (!bh) return -ENOSPC; - for (off=0; offb_data + off); - if (block==eblock && off>=eoff) { - /* Do not read/interpret the garbage in the end of eblock. */ - de->ino = 0; - } if (!de->ino) { - if ((block-sblock)*BFS_BSIZE + off >= dir->i_size) { + pos = (block - sblock) * BFS_BSIZE + off; + if (pos >= dir->i_size) { dir->i_size += BFS_DIRENT_SIZE; dir->i_ctime = CURRENT_TIME_SEC; } dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); de->ino = cpu_to_le16((u16)ino); - for (i=0; iname[i] = (i < namelen) ? name[i] : 0; + for (i = 0; i < BFS_NAMELEN; i++) + de->name[i] = + (i < namelen) ? name[i] : 0; mark_buffer_dirty(bh); brelse(bh); return 0; @@ -315,25 +323,26 @@ static int bfs_add_entry(struct inode * dir, const unsigned char * name, int nam return -ENOSPC; } -static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer) +static inline int bfs_namecmp(int len, const unsigned char *name, + const char *buffer) { - if (len < BFS_NAMELEN && buffer[len]) + if ((len < BFS_NAMELEN) && buffer[len]) return 0; return !memcmp(name, buffer, len); } -static struct buffer_head * bfs_find_entry(struct inode * dir, - const unsigned char * name, int namelen, struct bfs_dirent ** res_dir) +static struct buffer_head *bfs_find_entry(struct inode *dir, + const unsigned char *name, int namelen, + struct bfs_dirent **res_dir) { - unsigned long block, offset; - struct buffer_head * bh; - struct bfs_dirent * de; + unsigned long block = 0, offset = 0; + struct buffer_head *bh = NULL; + struct bfs_dirent *de; *res_dir = NULL; if (namelen > BFS_NAMELEN) return NULL; - bh = NULL; - block = offset = 0; + while (block * BFS_BSIZE + offset < dir->i_size) { if (!bh) { bh = sb_bread(dir->i_sb, BFS_I(dir)->i_sblock + block); @@ -344,7 +353,8 @@ static struct buffer_head * bfs_find_entry(struct inode * dir, } de = (struct bfs_dirent *)(bh->b_data + offset); offset += BFS_DIRENT_SIZE; - if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) { + if (le16_to_cpu(de->ino) && + bfs_namecmp(namelen, name, de->name)) { *res_dir = de; return bh; } diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 911b4ccf470f..b11e63e8fbcd 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -2,6 +2,11 @@ * fs/bfs/file.c * BFS file operations. * Copyright (C) 1999,2000 Tigran Aivazian + * + * Make the file block allocation algorithm understand the size + * of the underlying block device. + * Copyright (C) 2007 Dmitri Vorobiev + * */ #include @@ -27,7 +32,8 @@ const struct file_operations bfs_file_operations = { .splice_read = generic_file_splice_read, }; -static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb) +static int bfs_move_block(unsigned long from, unsigned long to, + struct super_block *sb) { struct buffer_head *bh, *new; @@ -43,21 +49,22 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo } static int bfs_move_blocks(struct super_block *sb, unsigned long start, - unsigned long end, unsigned long where) + unsigned long end, unsigned long where) { unsigned long i; dprintf("%08lx-%08lx->%08lx\n", start, end, where); for (i = start; i <= end; i++) if(bfs_move_block(i, where + i, sb)) { - dprintf("failed to move block %08lx -> %08lx\n", i, where + i); + dprintf("failed to move block %08lx -> %08lx\n", i, + where + i); return -EIO; } return 0; } -static int bfs_get_block(struct inode * inode, sector_t block, - struct buffer_head * bh_result, int create) +static int bfs_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create) { unsigned long phys; int err; @@ -66,9 +73,6 @@ static int bfs_get_block(struct inode * inode, sector_t block, struct bfs_inode_info *bi = BFS_I(inode); struct buffer_head *sbh = info->si_sbh; - if (block > info->si_blocks) - return -EIO; - phys = bi->i_sblock + block; if (!create) { if (phys <= bi->i_eblock) { @@ -79,21 +83,29 @@ static int bfs_get_block(struct inode * inode, sector_t block, return 0; } - /* if the file is not empty and the requested block is within the range - of blocks allocated for this file, we can grant it */ - if (inode->i_size && phys <= bi->i_eblock) { + /* + * If the file is not empty and the requested block is within the + * range of blocks allocated for this file, we can grant it. + */ + if (bi->i_sblock && (phys <= bi->i_eblock)) { dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); return 0; } - /* the rest has to be protected against itself */ + /* The file will be extended, so let's see if there is enough space. */ + if (phys >= info->si_blocks) + return -ENOSPC; + + /* The rest has to be protected against itself. */ lock_kernel(); - /* if the last data block for this file is the last allocated - block, we can extend the file trivially, without moving it - anywhere */ + /* + * If the last data block for this file is the last allocated + * block, we can extend the file trivially, without moving it + * anywhere. + */ if (bi->i_eblock == info->si_lf_eblk) { dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", create, (unsigned long)block, phys); @@ -106,13 +118,19 @@ static int bfs_get_block(struct inode * inode, sector_t block, goto out; } - /* Ok, we have to move this entire file to the next free block */ + /* Ok, we have to move this entire file to the next free block. */ phys = info->si_lf_eblk + 1; - if (bi->i_sblock) { /* if data starts on block 0 then there is no data */ + if (phys + block >= info->si_blocks) { + err = -ENOSPC; + goto out; + } + + if (bi->i_sblock) { err = bfs_move_blocks(inode->i_sb, bi->i_sblock, - bi->i_eblock, phys); + bi->i_eblock, phys); if (err) { - dprintf("failed to move ino=%08lx -> fs corruption\n", inode->i_ino); + dprintf("failed to move ino=%08lx -> fs corruption\n", + inode->i_ino); goto out; } } else @@ -124,8 +142,10 @@ static int bfs_get_block(struct inode * inode, sector_t block, phys += block; info->si_lf_eblk = bi->i_eblock = phys; - /* this assumes nothing can write the inode back while we are here - * and thus update inode->i_blocks! (XXX)*/ + /* + * This assumes nothing can write the inode back while we are here + * and thus update inode->i_blocks! (XXX) + */ info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks; mark_inode_dirty(inode); mark_buffer_dirty(sbh); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 7bd9c2bbe6ee..294c41baef6e 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -30,25 +30,26 @@ MODULE_LICENSE("GPL"); #define dprintf(x...) #endif -void dump_imap(const char *prefix, struct super_block * s); +void dump_imap(const char *prefix, struct super_block *s); -static void bfs_read_inode(struct inode * inode) +static void bfs_read_inode(struct inode *inode) { unsigned long ino = inode->i_ino; - struct bfs_inode * di; - struct buffer_head * bh; + struct bfs_inode *di; + struct buffer_head *bh; int block, off; - if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { + if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); make_bad_inode(inode); return; } - block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; bh = sb_bread(inode->i_sb, block); if (!bh) { - printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, + ino); make_bad_inode(inode); return; } @@ -56,7 +57,7 @@ static void bfs_read_inode(struct inode * inode) off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; - inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); + inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); if (le32_to_cpu(di->i_vtype) == BFS_VDIR) { inode->i_mode |= S_IFDIR; inode->i_op = &bfs_dir_inops; @@ -70,48 +71,48 @@ static void bfs_read_inode(struct inode * inode) BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); + BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); inode->i_uid = le32_to_cpu(di->i_uid); inode->i_gid = le32_to_cpu(di->i_gid); inode->i_nlink = le32_to_cpu(di->i_nlink); inode->i_size = BFS_FILESIZE(di); inode->i_blocks = BFS_FILEBLOCKS(di); - if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */ brelse(bh); } -static int bfs_write_inode(struct inode * inode, int unused) +static int bfs_write_inode(struct inode *inode, int unused) { unsigned int ino = (u16)inode->i_ino; unsigned long i_sblock; - struct bfs_inode * di; - struct buffer_head * bh; + struct bfs_inode *di; + struct buffer_head *bh; int block, off; dprintf("ino=%08x\n", ino); - if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { + if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); return -EIO; } lock_kernel(); - block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; bh = sb_bread(inode->i_sb, block); if (!bh) { - printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08x\n", + inode->i_sb->s_id, ino); unlock_kernel(); return -EIO; } - off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; + off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; if (ino == BFS_ROOT_INO) @@ -133,27 +134,26 @@ static int bfs_write_inode(struct inode * inode, int unused) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); - dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off); brelse(bh); unlock_kernel(); return 0; } -static void bfs_delete_inode(struct inode * inode) +static void bfs_delete_inode(struct inode *inode) { unsigned long ino = inode->i_ino; - struct bfs_inode * di; - struct buffer_head * bh; + struct bfs_inode *di; + struct buffer_head *bh; int block, off; - struct super_block * s = inode->i_sb; - struct bfs_sb_info * info = BFS_SB(s); - struct bfs_inode_info * bi = BFS_I(inode); + struct super_block *s = inode->i_sb; + struct bfs_sb_info *info = BFS_SB(s); + struct bfs_inode_info *bi = BFS_I(inode); dprintf("ino=%08lx\n", ino); truncate_inode_pages(&inode->i_data, 0); - if (ino < BFS_ROOT_INO || ino > info->si_lasti) { + if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) { printf("invalid ino=%08lx\n", ino); return; } @@ -162,31 +162,35 @@ static void bfs_delete_inode(struct inode * inode) inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; lock_kernel(); mark_inode_dirty(inode); - block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + + block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; bh = sb_bread(s, block); if (!bh) { - printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08lx\n", + inode->i_sb->s_id, ino); unlock_kernel(); return; } - off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; - di = (struct bfs_inode *) bh->b_data + off; + off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; + di = (struct bfs_inode *)bh->b_data + off; + memset((void *)di, 0, sizeof(struct bfs_inode)); + mark_buffer_dirty(bh); + brelse(bh); + if (bi->i_dsk_ino) { - info->si_freeb += 1 + bi->i_eblock - bi->i_sblock; + info->si_freeb += BFS_FILEBLOCKS(bi); info->si_freei++; clear_bit(ino, info->si_imap); dump_imap("delete_inode", s); } - di->i_ino = 0; - di->i_sblock = 0; - mark_buffer_dirty(bh); - brelse(bh); - /* if this was the last file, make the previous - block "last files last block" even if there is no real file there, - saves us 1 gap */ - if (info->si_lf_eblk == BFS_I(inode)->i_eblock) { - info->si_lf_eblk = BFS_I(inode)->i_sblock - 1; + /* + * If this was the last file, make the previous block + * "last block of the last file" even if there is no + * real file there, saves us 1 gap. + */ + if (info->si_lf_eblk == bi->i_eblock) { + info->si_lf_eblk = bi->i_sblock - 1; mark_buffer_dirty(info->si_sbh); } unlock_kernel(); @@ -228,7 +232,7 @@ static void bfs_write_super(struct super_block *s) unlock_kernel(); } -static struct kmem_cache * bfs_inode_cachep; +static struct kmem_cache *bfs_inode_cachep; static struct inode *bfs_alloc_inode(struct super_block *sb) { @@ -279,7 +283,7 @@ static const struct super_operations bfs_sops = { .statfs = bfs_statfs, }; -void dump_imap(const char *prefix, struct super_block * s) +void dump_imap(const char *prefix, struct super_block *s) { #ifdef DEBUG int i; @@ -287,25 +291,26 @@ void dump_imap(const char *prefix, struct super_block * s) if (!tmpbuf) return; - for (i=BFS_SB(s)->si_lasti; i>=0; i--) { - if (i > PAGE_SIZE-100) break; + for (i = BFS_SB(s)->si_lasti; i >= 0; i--) { + if (i > PAGE_SIZE - 100) break; if (test_bit(i, BFS_SB(s)->si_imap)) strcat(tmpbuf, "1"); else strcat(tmpbuf, "0"); } - printk(KERN_ERR "BFS-fs: %s: lasti=%08lx <%s>\n", prefix, BFS_SB(s)->si_lasti, tmpbuf); + printf("BFS-fs: %s: lasti=%08lx <%s>\n", + prefix, BFS_SB(s)->si_lasti, tmpbuf); free_page((unsigned long)tmpbuf); #endif } static int bfs_fill_super(struct super_block *s, void *data, int silent) { - struct buffer_head * bh; - struct bfs_super_block * bfs_sb; - struct inode * inode; + struct buffer_head *bh; + struct bfs_super_block *bfs_sb; + struct inode *inode; unsigned i, imap_len; - struct bfs_sb_info * info; + struct bfs_sb_info *info; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -329,14 +334,14 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; info->si_sbh = bh; - info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode) - + BFS_ROOT_INO - 1; - - imap_len = info->si_lasti/8 + 1; + info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / + sizeof(struct bfs_inode) + + BFS_ROOT_INO - 1; + imap_len = (info->si_lasti / 8) + 1; info->si_imap = kzalloc(imap_len, GFP_KERNEL); if (!info->si_imap) goto out; - for (i=0; isi_imap); s->s_op = &bfs_sops; @@ -352,16 +357,15 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) goto out; } - info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ - info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start))>>BFS_BSIZE_BITS; + info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 + - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; - info->si_lf_sblk = 0; - info->si_lf_ioff = 0; bh = NULL; - for (i=BFS_ROOT_INO; i<=info->si_lasti; i++) { + for (i = BFS_ROOT_INO; i <= info->si_lasti; i++) { struct bfs_inode *di; - int block = (i - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; + int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; unsigned long sblock, eblock; @@ -384,11 +388,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) sblock = le32_to_cpu(di->i_sblock); eblock = le32_to_cpu(di->i_eblock); - if (eblock > info->si_lf_eblk) { + if (eblock > info->si_lf_eblk) info->si_lf_eblk = eblock; - info->si_lf_sblk = sblock; - info->si_lf_ioff = BFS_INO2OFF(i); - } } brelse(bh); if (!(s->s_flags & MS_RDONLY)) { -- cgit v1.2.3 From c06a018fa5362fa9ed0768bd747c0fab26bc8849 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Wed, 14 Nov 2007 16:59:54 -0800 Subject: reiserfs: don't drop PG_dirty when releasing sub-page-sized dirty file This is not a new problem in 2.6.23-git17. 2.6.22/2.6.23 is buggy in the same way. Reiserfs could accumulate dirty sub-page-size files until umount time. They cannot be synced to disk by pdflush routines or explicit `sync' commands. Only `umount' can do the trick. The direct cause is: the dirty page's PG_dirty is wrongly _cleared_. Call trace: [] cancel_dirty_page+0xd0/0xf0 [] :reiserfs:reiserfs_cut_from_item+0x660/0x710 [] :reiserfs:reiserfs_do_truncate+0x271/0x530 [] :reiserfs:reiserfs_truncate_file+0xfd/0x3b0 [] :reiserfs:reiserfs_file_release+0x1e0/0x340 [] __fput+0xcc/0x1b0 [] fput+0x16/0x20 [] filp_close+0x56/0x90 [] sys_close+0xad/0x110 [] system_call+0x7e/0x83 Fix the bug by removing the cancel_dirty_page() call. Tests show that it causes no bad behaviors on various write sizes. === for the patient === Here are more detailed demonstrations of the problem. 1) the page has both PG_dirty(D)/PAGECACHE_TAG_DIRTY(d) after being written to; and then only PAGECACHE_TAG_DIRTY(d) remains after the file is closed. ------------------------------ screen 0 ------------------------------ [T0] root /home/wfg# cat > /test/tiny [T1] hi [T2] root /home/wfg# ------------------------------ screen 1 ------------------------------ [T1] root /home/wfg# echo /test/tiny > /proc/filecache [T1] root /home/wfg# cat /proc/filecache # file /test/tiny # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback # idx len state refcnt 0 1 ___UD__Bd_ 2 [T2] root /home/wfg# cat /proc/filecache # file /test/tiny # flags R:referenced A:active M:mmap U:uptodate D:dirty W:writeback O:owner B:buffer d:dirty w:writeback # idx len state refcnt 0 1 ___U___Bd_ 2 2) note the non-zero 'cancelled_write_bytes' after /tmp/hi is copied. ------------------------------ screen 0 ------------------------------ [T0] root /home/wfg# echo hi > /tmp/hi [T1] root /home/wfg# cp /tmp/hi /dev/stdin /test [T2] hi [T3] root /home/wfg# ------------------------------ screen 1 ------------------------------ [T1] root /proc/4397# cd /proc/`pidof cp` [T1] root /proc/4713# cat io rchar: 8396 wchar: 3 syscr: 20 syscw: 1 read_bytes: 0 write_bytes: 20480 cancelled_write_bytes: 4096 [T2] root /proc/4713# cat io rchar: 8399 wchar: 6 syscr: 21 syscw: 2 read_bytes: 0 write_bytes: 24576 cancelled_write_bytes: 4096 //Question: the 'write_bytes' is a bit more than expected ;-) Tested-by: Maxim Levitsky Cc: Peter Zijlstra Cc: Jeff Mahoney Signed-off-by: Fengguang Wu Reviewed-by: Chris Mason Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/stree.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index ca41567d7890..d2db2417b2bd 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1458,9 +1458,6 @@ static void unmap_buffers(struct page *page, loff_t pos) } bh = next; } while (bh != head); - if (PAGE_SIZE == bh->b_size) { - cancel_dirty_page(page, PAGE_CACHE_SIZE); - } } } } -- cgit v1.2.3 From 8744969a819de4ee5158f4cdb30104601cc015d4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 14 Nov 2007 17:00:02 -0800 Subject: fuse_file_alloc(): fix NULL dereferences Fix obvious NULL dereferences spotted by the Coverity checker. Signed-off-by: Adrian Bunk Acked-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0fcdba9d47c0..535b37399009 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -55,9 +55,10 @@ struct fuse_file *fuse_file_alloc(void) if (!ff->reserved_req) { kfree(ff); ff = NULL; + } else { + INIT_LIST_HEAD(&ff->write_entry); + atomic_set(&ff->count, 0); } - INIT_LIST_HEAD(&ff->write_entry); - atomic_set(&ff->count, 0); } return ff; } -- cgit v1.2.3 From 9fcc2d15b14894aa53e5e8b7fd5d6e3ca558e5df Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 14 Nov 2007 17:00:07 -0800 Subject: proc: simplify and correct proc_flush_task Currently we special case when we have only the initial pid namespace. Unfortunately in doing so the copied case for the other namespaces was broken so we don't properly flush the thread directories :( So this patch removes the unnecessary special case (removing a usage of proc_mnt) and corrects the flushing of the thread directories. Signed-off-by: Eric W. Biederman Cc: Al Viro Cc: Pavel Emelyanov Cc: Sukadev Bhattiprolu Cc: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index aeaf0d0f2f51..a17c26859074 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2328,21 +2328,18 @@ out: void proc_flush_task(struct task_struct *task) { - int i, leader; - struct pid *pid, *tgid; + int i; + struct pid *pid, *tgid = NULL; struct upid *upid; - leader = thread_group_leader(task); - proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0); pid = task_pid(task); - if (pid->level == 0) - return; + if (thread_group_leader(task)) + tgid = task_tgid(task); - tgid = task_tgid(task); - for (i = 1; i <= pid->level; i++) { + for (i = 0; i <= pid->level; i++) { upid = &pid->numbers[i]; proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, - leader ? 0 : tgid->numbers[i].nr); + tgid ? tgid->numbers[i].nr : 0); } upid = &pid->numbers[pid->level]; -- cgit v1.2.3 From cb51f973bce7aef46452b0c6faea8f791885f5b8 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 14 Nov 2007 17:00:10 -0800 Subject: mark sys_open/sys_read exports unused sys_open / sys_read were used in the early 1.2 days to load firmware from disk inside drivers. Since 2.0 or so this was deprecated behavior, but several drivers still were using this. Since a few years we have a request_firmware() API that implements this in a nice, consistent way. Only some old ISA sound drivers (pre-ALSA) still straggled along for some time.... however with commit c2b1239a9f22f19c53543b460b24507d0e21ea0c the last user is now gone. This is a good thing, since using sys_open / sys_read etc for firmware is a very buggy to dangerous thing to do; these operations put an fd in the process file descriptor table.... which then can be tampered with from other threads for example. For those who don't want the firmware loader, filp_open()/vfs_read are the better APIs to use, without this security issue. The patch below marks sys_open and sys_read unused now that they're really not used anymore, and for deletion in the 2.6.25 timeframe. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/open.c | 2 +- fs/read_write.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 3b69c53e1837..4932b4d1da05 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1061,7 +1061,7 @@ asmlinkage long sys_open(const char __user *filename, int flags, int mode) prevent_tail_call(ret); return ret; } -EXPORT_SYMBOL_GPL(sys_open); +EXPORT_UNUSED_SYMBOL_GPL(sys_open); /* To be deleted for 2.6.25 */ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, int mode) diff --git a/fs/read_write.c b/fs/read_write.c index 124693e8d3fa..ea1f94cc722e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -370,7 +370,7 @@ asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) return ret; } -EXPORT_SYMBOL_GPL(sys_read); +EXPORT_UNUSED_SYMBOL_GPL(sys_read); /* to be deleted for 2.6.25 */ asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) { -- cgit v1.2.3 From dbaf4c024a657175f43b5091c4fab8b9f0e17078 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 14 Nov 2007 17:00:18 -0800 Subject: smbfs: fix debug builds Fix some warnings with SMBFS_DEBUG_* builds. This patch makes it so that builds with -Werror don't fail. Signed-off-by: Jeff Layton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/smbfs/file.c | 7 ++++--- fs/smbfs/inode.c | 2 +- fs/smbfs/proc.c | 2 +- fs/smbfs/smbiod.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index f5d14cebc75a..efbe29af3d7a 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -234,7 +234,7 @@ smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov, VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n", (long)dentry->d_inode->i_size, - dentry->d_inode->i_flags, dentry->d_inode->i_atime); + dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec); status = generic_file_aio_read(iocb, iov, nr_segs, pos); out: @@ -269,7 +269,7 @@ smb_file_splice_read(struct file *file, loff_t *ppos, struct dentry *dentry = file->f_path.dentry; ssize_t status; - VERBOSE("file %s/%s, pos=%Ld, count=%d\n", + VERBOSE("file %s/%s, pos=%Ld, count=%lu\n", DENTRY_PATH(dentry), *ppos, count); status = smb_revalidate_inode(dentry); @@ -363,7 +363,8 @@ smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov, result = generic_file_aio_write(iocb, iov, nr_segs, pos); VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n", (long) file->f_pos, (long) dentry->d_inode->i_size, - dentry->d_inode->i_mtime, dentry->d_inode->i_atime); + dentry->d_inode->i_mtime.tv_sec, + dentry->d_inode->i_atime.tv_sec); } out: return result; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index ab517755ece0..9416ead0c7aa 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -536,7 +536,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) /* Allocate the global temp buffer and some superblock helper structs */ /* FIXME: move these to the smb_sb_info struct */ - VERBOSE("alloc chunk = %d\n", sizeof(struct smb_ops) + + VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) + sizeof(struct smb_mount_data_kernel)); mem = kmalloc(sizeof(struct smb_ops) + sizeof(struct smb_mount_data_kernel), GFP_KERNEL); diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index feac46050619..d517a27b7f4b 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -2593,7 +2593,7 @@ smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry, fattr->f_mtime.tv_sec = date_dos2unix(server, date, time); fattr->f_mtime.tv_nsec = 0; VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n", - mask, date, time, fattr->f_mtime); + mask, date, time, fattr->f_mtime.tv_sec); fattr->f_size = DVAL(req->rq_data, 12); /* ULONG allocation size */ fattr->attr = WVAL(req->rq_data, 20); diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 283c5720c9de..fae8e85af0ed 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -227,7 +227,7 @@ int smbiod_retry(struct smb_sb_info *server) printk(KERN_ERR "smb_retry: signal failed [%d]\n", result); goto out; } - VERBOSE("signalled pid %d\n", pid); + VERBOSE("signalled pid %d\n", pid_nr(pid)); /* FIXME: The retried requests should perhaps get a "time boost". */ -- cgit v1.2.3 From 7c06a8dc64a2d1884bd19b4c6353d9267ae4e3e1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 14 Nov 2007 17:00:19 -0800 Subject: Fix 64KB blocksize in ext3 directories With 64KB blocksize, a directory entry can have size 64KB which does not fit into 16 bits we have for entry lenght. So we store 0xffff instead and convert value when read from / written to disk. The patch also converts some places to use ext3_next_entry() when we are changing them anyway. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/dir.c | 10 +++--- fs/ext3/namei.c | 92 ++++++++++++++++++++++++------------------------- include/linux/ext3_fs.h | 20 +++++++++++ 3 files changed, 70 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index c8e4ee3af1d0..8ca3bfd72427 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -67,7 +67,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir, unsigned long offset) { const char * error_msg = NULL; - const int rlen = le16_to_cpu(de->rec_len); + const int rlen = ext3_rec_len_from_disk(de->rec_len); if (rlen < EXT3_DIR_REC_LEN(1)) error_msg = "rec_len is smaller than minimal"; @@ -173,10 +173,10 @@ revalidate: * least that it is non-zero. A * failure will be detected in the * dirent test below. */ - if (le16_to_cpu(de->rec_len) < + if (ext3_rec_len_from_disk(de->rec_len) < EXT3_DIR_REC_LEN(1)) break; - i += le16_to_cpu(de->rec_len); + i += ext3_rec_len_from_disk(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) @@ -197,7 +197,7 @@ revalidate: ret = stored; goto out; } - offset += le16_to_cpu(de->rec_len); + offset += ext3_rec_len_from_disk(de->rec_len); if (le32_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is @@ -219,7 +219,7 @@ revalidate: goto revalidate; stored ++; } - filp->f_pos += le16_to_cpu(de->rec_len); + filp->f_pos += ext3_rec_len_from_disk(de->rec_len); } offset = 0; brelse (bh); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index ec8170adac53..4ab6f76e63d0 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -176,6 +176,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); +/* + * p is at least 6 bytes before the end of page + */ +static inline struct ext3_dir_entry_2 * +ext3_next_entry(struct ext3_dir_entry_2 *p) +{ + return (struct ext3_dir_entry_2 *)((char *)p + + ext3_rec_len_from_disk(p->rec_len)); +} + /* * Future: use high four bits of block for coalesce-on-delete flags * Mask them off for now. @@ -280,7 +290,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent space += EXT3_DIR_REC_LEN(de->name_len); names++; } - de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); } printk("(%i)\n", names); return (struct stats) { names, space, 1 }; @@ -546,14 +556,6 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash, } -/* - * p is at least 6 bytes before the end of page - */ -static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -{ - return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -} - /* * This function fills a red-black tree with information from a * directory block. It returns the number directory entries loaded @@ -720,7 +722,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, cond_resched(); } /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); } return count; } @@ -822,7 +824,7 @@ static inline int search_dirblock(struct buffer_head * bh, return 1; } /* prevent looping on a bad block */ - de_len = le16_to_cpu(de->rec_len); + de_len = ext3_rec_len_from_disk(de->rec_len); if (de_len <= 0) return -1; offset += de_len; @@ -1130,7 +1132,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) rec_len = EXT3_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); ((struct ext3_dir_entry_2 *) to)->rec_len = - cpu_to_le16(rec_len); + ext3_rec_len_to_disk(rec_len); de->inode = 0; map++; to += rec_len; @@ -1149,13 +1151,12 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) prev = to = de; while ((char*)de < base + size) { - next = (struct ext3_dir_entry_2 *) ((char *) de + - le16_to_cpu(de->rec_len)); + next = ext3_next_entry(de); if (de->inode && de->name_len) { rec_len = EXT3_DIR_REC_LEN(de->name_len); if (de > to) memmove(to, de, rec_len); - to->rec_len = cpu_to_le16(rec_len); + to->rec_len = ext3_rec_len_to_disk(rec_len); prev = to; to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); } @@ -1229,8 +1230,8 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split); de = dx_pack_dirents(data1,blocksize); - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); - de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); + de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); + de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2); dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); @@ -1300,7 +1301,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, return -EEXIST; } nlen = EXT3_DIR_REC_LEN(de->name_len); - rlen = le16_to_cpu(de->rec_len); + rlen = ext3_rec_len_from_disk(de->rec_len); if ((de->inode? rlen - nlen: rlen) >= reclen) break; de = (struct ext3_dir_entry_2 *)((char *)de + rlen); @@ -1319,11 +1320,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, /* By now the buffer is marked for journaling */ nlen = EXT3_DIR_REC_LEN(de->name_len); - rlen = le16_to_cpu(de->rec_len); + rlen = ext3_rec_len_from_disk(de->rec_len); if (de->inode) { struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); - de1->rec_len = cpu_to_le16(rlen - nlen); - de->rec_len = cpu_to_le16(nlen); + de1->rec_len = ext3_rec_len_to_disk(rlen - nlen); + de->rec_len = ext3_rec_len_to_disk(nlen); de = de1; } de->file_type = EXT3_FT_UNKNOWN; @@ -1400,17 +1401,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, /* The 0th block becomes the root, move the dirents out */ fde = &root->dotdot; - de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); + de = (struct ext3_dir_entry_2 *)((char *)fde + + ext3_rec_len_from_disk(fde->rec_len)); len = ((char *) root) + blocksize - (char *) de; memcpy (data1, de, len); de = (struct ext3_dir_entry_2 *) data1; top = data1 + len; - while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) + while ((char *)(de2 = ext3_next_entry(de)) < top) de = de2; - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); + de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); /* Initialize the root; the dot dirents already exist */ de = (struct ext3_dir_entry_2 *) (&root->dotdot); - de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); + de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2)); memset (&root->info, 0, sizeof(root->info)); root->info.info_length = sizeof(root->info); root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; @@ -1490,7 +1492,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, return retval; de = (struct ext3_dir_entry_2 *) bh->b_data; de->inode = 0; - de->rec_len = cpu_to_le16(blocksize); + de->rec_len = ext3_rec_len_to_disk(blocksize); return add_dirent_to_buf(handle, dentry, inode, de, bh); } @@ -1553,7 +1555,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, goto cleanup; node2 = (struct dx_node *)(bh2->b_data); entries2 = node2->entries; - node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); + node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize); node2->fake.inode = 0; BUFFER_TRACE(frame->bh, "get_write_access"); err = ext3_journal_get_write_access(handle, frame->bh); @@ -1651,9 +1653,9 @@ static int ext3_delete_entry (handle_t *handle, BUFFER_TRACE(bh, "get_write_access"); ext3_journal_get_write_access(handle, bh); if (pde) - pde->rec_len = - cpu_to_le16(le16_to_cpu(pde->rec_len) + - le16_to_cpu(de->rec_len)); + pde->rec_len = ext3_rec_len_to_disk( + ext3_rec_len_from_disk(pde->rec_len) + + ext3_rec_len_from_disk(de->rec_len)); else de->inode = 0; dir->i_version++; @@ -1661,10 +1663,9 @@ static int ext3_delete_entry (handle_t *handle, ext3_journal_dirty_metadata(handle, bh); return 0; } - i += le16_to_cpu(de->rec_len); + i += ext3_rec_len_from_disk(de->rec_len); pde = de; - de = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); } return -ENOENT; } @@ -1798,13 +1799,13 @@ retry: de = (struct ext3_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; - de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len)); + de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len)); strcpy (de->name, "."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); - de = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + de = ext3_next_entry(de); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1)); + de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize - + EXT3_DIR_REC_LEN(1)); de->name_len = 2; strcpy (de->name, ".."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); @@ -1856,8 +1857,7 @@ static int empty_dir (struct inode * inode) return 1; } de = (struct ext3_dir_entry_2 *) bh->b_data; - de1 = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + de1 = ext3_next_entry(de); if (le32_to_cpu(de->inode) != inode->i_ino || !le32_to_cpu(de1->inode) || strcmp (".", de->name) || @@ -1868,9 +1868,9 @@ static int empty_dir (struct inode * inode) brelse (bh); return 1; } - offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); - de = (struct ext3_dir_entry_2 *) - ((char *) de1 + le16_to_cpu(de1->rec_len)); + offset = ext3_rec_len_from_disk(de->rec_len) + + ext3_rec_len_from_disk(de1->rec_len); + de = ext3_next_entry(de1); while (offset < inode->i_size ) { if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { @@ -1899,9 +1899,8 @@ static int empty_dir (struct inode * inode) brelse (bh); return 0; } - offset += le16_to_cpu(de->rec_len); - de = (struct ext3_dir_entry_2 *) - ((char *) de + le16_to_cpu(de->rec_len)); + offset += ext3_rec_len_from_disk(de->rec_len); + de = ext3_next_entry(de); } brelse (bh); return 1; @@ -2255,8 +2254,7 @@ retry: } #define PARENT_INO(buffer) \ - ((struct ext3_dir_entry_2 *) ((char *) buffer + \ - le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode + (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode) /* * Anybody can rename anything with this: the permission checks are left to the diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 64134456ed8c..241c01cb92b2 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -656,6 +656,26 @@ struct ext3_dir_entry_2 { #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ ~EXT3_DIR_ROUND) +#define EXT3_MAX_REC_LEN ((1<<16)-1) + +static inline unsigned ext3_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + + if (len == EXT3_MAX_REC_LEN) + return 1 << 16; + return len; +} + +static inline __le16 ext3_rec_len_to_disk(unsigned len) +{ + if (len == (1 << 16)) + return cpu_to_le16(EXT3_MAX_REC_LEN); + else if (len > (1 << 16)) + BUG(); + return cpu_to_le16(len); +} + /* * Hash Tree Directory indexing * (c) Daniel Phillips, 2001 -- cgit v1.2.3