diff options
author | David Woodhouse <David.Woodhouse@intel.com> | 2010-05-10 14:32:46 +0100 |
---|---|---|
committer | David Woodhouse <David.Woodhouse@intel.com> | 2010-05-10 14:32:46 +0100 |
commit | 0ae28a35bcb7984838acbf28bfba9c030f8b74f0 (patch) | |
tree | 4f449d929b5df9e126e839f388ff0fd2b52028a0 /fs | |
parent | 6f1f3d0ab5c3eeea9f04486481c25e9afdfa26c5 (diff) | |
parent | b57f95a38233a2e73b679bea4a5453a1cc2a1cc9 (diff) | |
download | linux-0ae28a35bcb7984838acbf28bfba9c030f8b74f0.tar.bz2 |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
drivers/mtd/mtdcore.c
Pull in the bdi fixes and ARM platform changes that other outstanding
patches depend on.
Diffstat (limited to 'fs')
429 files changed, 2431 insertions, 1407 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c index e777961939f3..0dbe0d139ac2 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -22,6 +22,7 @@ #include <linux/jiffies.h> #include <linux/file.h> +#include <linux/slab.h> #include <linux/stat.h> #include <linux/sched.h> #include <linux/fs.h> diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 08b2eb157048..7317b39b2815 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/idr.h> #include <net/9p/9p.h> @@ -110,7 +111,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) { int i, n, l, clone, any, access; u32 uid; - struct p9_fid *fid; + struct p9_fid *fid, *old_fid = NULL; struct dentry *d, *ds; struct v9fs_session_info *v9ses; char **wnames, *uname; @@ -183,10 +184,18 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) l = min(n - i, P9_MAXWELEM); fid = p9_client_walk(fid, l, &wnames[i], clone); if (IS_ERR(fid)) { + if (old_fid) { + /* + * If we fail, clunk fid which are mapping + * to path component and not the last component + * of the path. + */ + p9_client_clunk(old_fid); + } kfree(wnames); return fid; } - + old_fid = fid; i += l; clone = 0; } diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 6c7f6a251115..f8b86e92cd66 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -29,6 +29,7 @@ #include <linux/sched.h> #include <linux/parser.h> #include <linux/idr.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> @@ -237,11 +238,18 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return ERR_PTR(-ENOMEM); } + rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); + if (rc) { + __putname(v9ses->aname); + __putname(v9ses->uname); + return ERR_PTR(rc); + } + spin_lock(&v9fs_sessionlist_lock); list_add(&v9ses->slist, &v9fs_sessionlist); spin_unlock(&v9fs_sessionlist_lock); - v9ses->flags = V9FS_PROTO_2000U | V9FS_ACCESS_USER; + v9ses->flags = V9FS_ACCESS_USER; strcpy(v9ses->uname, V9FS_DEFUSER); strcpy(v9ses->aname, V9FS_DEFANAME); v9ses->uid = ~0; @@ -262,8 +270,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, goto error; } - if (!p9_is_proto_dotu(v9ses->clnt)) - v9ses->flags &= ~V9FS_PROTO_2000U; + if (p9_is_proto_dotl(v9ses->clnt)) + v9ses->flags |= V9FS_PROTO_2000L; + else if (p9_is_proto_dotu(v9ses->clnt)) + v9ses->flags |= V9FS_PROTO_2000U; v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; @@ -298,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return fid; error: + bdi_destroy(&v9ses->bdi); return ERR_PTR(retval); } @@ -323,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) __putname(v9ses->uname); __putname(v9ses->aname); + bdi_destroy(&v9ses->bdi); + spin_lock(&v9fs_sessionlist_lock); list_del(&v9ses->slist); spin_unlock(&v9fs_sessionlist_lock); @@ -340,6 +353,19 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { p9_client_disconnect(v9ses->clnt); } +/** + * v9fs_session_begin_cancel - Begin terminate of a session + * @v9ses: session to terminate + * + * After this call we don't allow any request other than clunk. + */ + +void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses) +{ + P9_DPRINTK(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses); + p9_client_begin_disconnect(v9ses->clnt); +} + extern int v9fs_error_init(void); static struct kobject *v9fs_kobj; diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 6b801d1ddf4b..bec4d0bcb458 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -20,6 +20,7 @@ * Boston, MA 02111-1301 USA * */ +#include <linux/backing-dev.h> /** * enum p9_session_flags - option flags for each 9P session @@ -102,12 +103,14 @@ struct v9fs_session_info { u32 uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ + struct backing_dev_info bdi; }; struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, char *); void v9fs_session_close(struct v9fs_session_info *v9ses); void v9fs_session_cancel(struct v9fs_session_info *v9ses); +void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); #define V9FS_MAGIC 0x01021997 diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index d74325295b1e..cbf4e50f3933 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -34,6 +34,7 @@ #include <linux/namei.h> #include <linux/idr.h> #include <linux/sched.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <net/9p/client.h> diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index d8a3afe4ff72..0adfd64dfcee 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -32,6 +32,7 @@ #include <linux/sched.h> #include <linux/inet.h> #include <linux/idr.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -130,6 +131,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) rdir = (struct p9_rdir *) fid->rdir; err = mutex_lock_interruptible(&rdir->mutex); + if (err) + return err; while (err == 0) { if (rdir->tail == rdir->head) { err = v9fs_file_readn(filp, rdir->buf, NULL, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 5fe45d692c9f..f2434fc9d2c4 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -34,6 +34,7 @@ #include <linux/namei.h> #include <linux/idr.h> #include <linux/sched.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -431,6 +432,7 @@ error: static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) { + int retval; struct inode *file_inode; struct v9fs_session_info *v9ses; struct p9_fid *v9fid; @@ -444,7 +446,10 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) if (IS_ERR(v9fid)) return PTR_ERR(v9fid); - return p9_client_remove(v9fid); + retval = p9_client_remove(v9fid); + if (!retval) + drop_nlink(file_inode); + return retval; } static int @@ -656,6 +661,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", dir, dentry->d_name.name, dentry, nameidata); + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + sb = dir->i_sb; v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_fid_lookup(dentry->d_parent); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 69357c0d9899..806da5d3b3a0 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,6 +37,7 @@ #include <linux/mount.h> #include <linux/idr.h> #include <linux/sched.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -76,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, sb->s_blocksize = 1 << sb->s_blocksize_bits; sb->s_magic = V9FS_MAGIC; sb->s_op = &v9fs_super_ops; + sb->s_bdi = &v9ses->bdi; sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | MS_NOATIME; @@ -193,6 +195,7 @@ static void v9fs_kill_super(struct super_block *s) kill_anon_super(s); + v9fs_session_cancel(v9ses); v9fs_session_close(v9ses); kfree(v9ses); s->s_fs_info = NULL; @@ -205,7 +208,7 @@ v9fs_umount_begin(struct super_block *sb) struct v9fs_session_info *v9ses; v9ses = sb->s_fs_info; - v9fs_session_cancel(v9ses); + v9fs_session_begin_cancel(v9ses); } static const struct super_operations v9fs_super_ops = { diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 6910a98bd73c..4a3af7075c1d 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -13,6 +13,7 @@ #include <linux/parser.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/statfs.h> #include "adfs.h" diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 8306d53307ed..3e262711ae06 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -7,6 +7,7 @@ * block allocation, deallocation, calculation of free space. */ +#include <linux/slab.h> #include "affs.h" /* This is, of course, shamelessly stolen from fs/minix */ diff --git a/fs/affs/inode.c b/fs/affs/inode.c index c9744d771d98..f4b2a4ee4f91 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -10,6 +10,7 @@ * (C) 1991 Linus Torvalds - minix filesystem */ #include <linux/sched.h> +#include <linux/gfp.h> #include "affs.h" extern const struct inode_operations affs_symlink_inode_operations; diff --git a/fs/affs/super.c b/fs/affs/super.c index d41e9673cd97..16a3e4765f68 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -17,6 +17,7 @@ #include <linux/magic.h> #include <linux/sched.h> #include <linux/smp_lock.h> +#include <linux/slab.h> #include "affs.h" extern struct timezone sys_tz; diff --git a/fs/afs/cache.c b/fs/afs/cache.c index e2b1d3f16519..0fb315dd4d2a 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/slab.h> #include <linux/sched.h> #include "internal.h" diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index eb765489164f..a3bcec75c54a 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/ip.h> #include "internal.h" diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 88067f36e5e7..adc1cb771b57 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/ctype.h> diff --git a/fs/afs/file.c b/fs/afs/file.c index 39b301662f22..0df9bc2b724d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -12,10 +12,10 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/writeback.h> +#include <linux/gfp.h> #include "internal.h" static int afs_readpage(struct file *file, struct page *page); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 023b95b0d9d7..4bd0218473a9 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -10,6 +10,7 @@ */ #include <linux/init.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/circ_buf.h> #include "internal.h" diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c048f0658751..d00b312e3110 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -16,7 +16,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/sched.h> diff --git a/fs/afs/internal.h b/fs/afs/internal.h index c54dad4e6063..a10f2582844f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/fscache.h> +#include <linux/backing-dev.h> #include "afs.h" #include "afs_vl.h" @@ -313,6 +314,7 @@ struct afs_volume { unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ struct rw_semaphore server_sem; /* lock for accessing current server */ + struct backing_dev_info bdi; }; /* diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 5ffb570cd3a8..b3feddc4f7d6 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -12,11 +12,11 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/namei.h> +#include <linux/gfp.h> #include "internal.h" @@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) { struct afs_super_info *super; struct vfsmount *mnt; - struct page *page = NULL; + struct page *page; size_t size; - char *buf, *devname = NULL, *options = NULL; + char *buf, *devname, *options; int ret; _enter("{%s}", mntpt->d_name.name); @@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) ret = -EINVAL; size = mntpt->d_inode->i_size; if (size > PAGE_SIZE - 1) - goto error; + goto error_no_devname; ret = -ENOMEM; devname = (char *) get_zeroed_page(GFP_KERNEL); if (!devname) - goto error; + goto error_no_devname; options = (char *) get_zeroed_page(GFP_KERNEL); if (!options) - goto error; + goto error_no_options; /* read the contents of the AFS special symlink */ page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); if (IS_ERR(page)) { ret = PTR_ERR(page); - goto error; + goto error_no_page; } ret = -EIO; @@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) return mnt; error: - if (page) - page_cache_release(page); - if (devname) - free_page((unsigned long) devname); - if (options) - free_page((unsigned long) options); + page_cache_release(page); +error_no_page: + free_page((unsigned long) options); +error_no_options: + free_page((unsigned long) devname); +error_no_devname: _leave(" = %d", ret); return ERR_PTR(ret); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bde3f19c0995..67cf810e0fd6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/slab.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <rxrpc/packet.h> diff --git a/fs/afs/security.c b/fs/afs/security.c index 3ef504370034..bb4ed144d0e4 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -189,8 +189,9 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order) if (!permits) goto out_unlock; - memcpy(permits->permits, xpermits->permits, - count * sizeof(struct afs_permit)); + if (xpermits) + memcpy(permits->permits, xpermits->permits, + count * sizeof(struct afs_permit)); _debug("key %x access %x", key_serial(key), vnode->status.caller_access); diff --git a/fs/afs/super.c b/fs/afs/super.c index 14f6431598ad..e932e5a3a0c1 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data) sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; sb->s_fs_info = as; + sb->s_bdi = &as->volume->bdi; /* allocate the root inode and dentry */ fid.vid = as->volume->vid; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 36c1306e09e0..340afd0cd182 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/gfp.h> #include <linux/init.h> #include <linux/sched.h> #include "internal.h" diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 6e689208def2..9ac260d1361d 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/sched.h> #include "internal.h" diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c index 2f05c4fc2a70..25cf4c3f4ff7 100644 --- a/fs/afs/vnode.c +++ b/fs/afs/vnode.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/sched.h> #include "internal.h" diff --git a/fs/afs/volume.c b/fs/afs/volume.c index a353e69e2391..401eeb21869f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) volume->cell = params->cell; volume->vid = vlocation->vldb.vid[params->type]; + ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY); + if (ret) + goto error_bdi; + init_rwsem(&volume->server_sem); /* look up all the applicable server records */ @@ -151,6 +155,8 @@ error: return ERR_PTR(ret); error_discard: + bdi_destroy(&volume->bdi); +error_bdi: up_write(¶ms->cell->vl_sem); for (loop = volume->nservers - 1; loop >= 0; loop--) @@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume) for (loop = volume->nservers - 1; loop >= 0; loop--) afs_put_server(volume->servers[loop]); + bdi_destroy(&volume->bdi); kfree(volume); _leave(" [destroyed]"); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 2de009565d8e..e4b75d6eda83 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -12,7 +12,6 @@ #include <linux/file.h> #include <linux/poll.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/mount.h> diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 4a1401cea0a1..8713c7cfbc79 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -13,6 +13,7 @@ #include <linux/capability.h> #include <linux/errno.h> #include <linux/stat.h> +#include <linux/slab.h> #include <linux/param.h> #include <linux/time.h> #include <linux/smp_lock.h> diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index c8a80dffb455..d29b7f6df862 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -22,6 +22,7 @@ #include <linux/magic.h> #include <linux/dcache.h> #include <linux/uaccess.h> +#include <linux/slab.h> #include "autofs_i.h" diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index a015b49891df..109a6c606d92 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -15,6 +15,7 @@ #include <linux/capability.h> #include <linux/errno.h> #include <linux/stat.h> +#include <linux/slab.h> #include <linux/param.h> #include <linux/time.h> #include "autofs_i.h" diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index e3287d0d1a58..59096b5e0fc7 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -11,7 +11,6 @@ */ #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/buffer_head.h> #include <linux/string.h> diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 15d80bb35d6f..f96eff04e11a 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -20,11 +20,11 @@ #include <linux/fcntl.h> #include <linux/ptrace.h> #include <linux/user.h> -#include <linux/slab.h> #include <linux/binfmts.h> #include <linux/personality.h> #include <linux/init.h> #include <linux/coredump.h> +#include <linux/slab.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -75,14 +75,16 @@ static int aout_core_dump(struct coredump_params *cprm) struct file *file = cprm->file; mm_segment_t fs; int has_dumped = 0; - unsigned long dump_start, dump_size; + void __user *dump_start; + int dump_size; struct user dump; #ifdef __alpha__ -# define START_DATA(u) (u.start_data) +# define START_DATA(u) ((void __user *)u.start_data) #else -# define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code) +# define START_DATA(u) ((void __user *)((u.u_tsize << PAGE_SHIFT) + \ + u.start_code)) #endif -# define START_STACK(u) (u.start_stack) +# define START_STACK(u) ((void __user *)u.start_stack) fs = get_fs(); set_fs(KERNEL_DS); @@ -104,9 +106,9 @@ static int aout_core_dump(struct coredump_params *cprm) /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); - if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) + if (!access_ok(VERIFY_READ, START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; - if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) + if (!access_ok(VERIFY_READ, START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; set_fs(KERNEL_DS); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2c32d00a6690..2c5f9a0e5d72 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( } } else if (!mm->start_data) { mm->start_data = seg->addr; -#ifndef CONFIG_MMU mm->end_data = seg->addr + phdr->p_memsz; -#endif } - -#ifdef CONFIG_MMU - if (seg->addr + phdr->p_memsz > mm->end_data) - mm->end_data = seg->addr + phdr->p_memsz; -#endif } seg++; @@ -1590,7 +1583,7 @@ static size_t elf_core_vma_data_size(unsigned long mm_flags) struct vm_area_struct *vma; size_t size = 0; - for (vma = current->mm->mmap; vma; vma->vm_next) + for (vma = current->mm->mmap; vma; vma = vma->vm_next) if (maydump(vma, mm_flags)) size += vma->vm_end - vma->vm_start; return size; diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 32fb00b52cd0..b8e8b0acf9bd 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/stat.h> -#include <linux/slab.h> #include <linux/binfmts.h> #include <linux/elf.h> #include <linux/init.h> diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e0e769bdca59..49566c1687d8 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)", - (int) r,(int)(start_brk-start_code),(int)text_len); + (int) r,(int)(start_brk-start_data+text_len),(int)text_len); goto failed; } diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 08343505e184..aca9d55afb22 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -8,7 +8,6 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/stat.h> -#include <linux/slab.h> #include <linux/binfmts.h> #include <linux/init.h> #include <linux/file.h> diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index a16f29e888cd..612a5c38d3c1 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -24,6 +24,7 @@ #include <linux/mempool.h> #include <linux/bio.h> #include <linux/workqueue.h> +#include <linux/slab.h> struct integrity_slab { struct kmem_cache *slab; @@ -554,7 +554,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page .bi_rw = bio->bi_rw, }; - if (q->merge_bvec_fn(q, &bvm, prev) < len) { + if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) { prev->bv_len -= len; return 0; } @@ -607,7 +607,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * merge_bvec_fn() returns number of bytes it can accept * at this offset */ - if (q->merge_bvec_fn(q, &bvm, bvec) < len) { + if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { bvec->bv_page = NULL; bvec->bv_len = 0; bvec->bv_offset = 0; diff --git a/fs/block_dev.c b/fs/block_dev.c index d11d0289f3d2..6dcee88c2e5d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -404,20 +404,28 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) * NULL first argument is nfsd_sync_dir() and that's not a directory. */ -static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) +int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) { - struct block_device *bdev = I_BDEV(filp->f_mapping->host); + struct inode *bd_inode = filp->f_mapping->host; + struct block_device *bdev = I_BDEV(bd_inode); int error; - error = sync_blockdev(bdev); - if (error) - return error; - + /* + * There is no need to serialise calls to blkdev_issue_flush with + * i_mutex and doing so causes performance issues with concurrent + * O_SYNC writers to a block device. + */ + mutex_unlock(&bd_inode->i_mutex); + error = blkdev_issue_flush(bdev, NULL); if (error == -EOPNOTSUPP) error = 0; + + mutex_lock(&bd_inode->i_mutex); + return error; } +EXPORT_SYMBOL(blkdev_fsync); /* * pseudo-fs @@ -1481,7 +1489,7 @@ const struct file_operations def_blk_fops = { .aio_read = generic_file_aio_read, .aio_write = blkdev_aio_write, .mmap = generic_file_mmap, - .fsync = block_fsync, + .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6df6d6ed74fd..6ef7b26724ec 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -22,6 +22,7 @@ #include <linux/posix_acl_xattr.h> #include <linux/posix_acl.h> #include <linux/sched.h> +#include <linux/slab.h> #include "ctree.h" #include "btrfs_inode.h" diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c0861e781cdb..462859a30141 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -17,6 +17,7 @@ */ #include <linux/kthread.h> +#include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/freezer.h> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 28b92a7218ab..396039b3a8a2 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -31,7 +31,7 @@ #include <linux/swap.h> #include <linux/writeback.h> #include <linux/bit_spinlock.h> -#include <linux/pagevec.h> +#include <linux/slab.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -445,7 +445,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, unsigned long nr_pages = 0; struct extent_map *em; struct address_space *mapping = inode->i_mapping; - struct pagevec pvec; struct extent_map_tree *em_tree; struct extent_io_tree *tree; u64 end; @@ -461,7 +460,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - pagevec_init(&pvec, 0); while (last_offset < compressed_end) { page_index = last_offset >> PAGE_CACHE_SHIFT; @@ -478,26 +476,17 @@ static noinline int add_ra_bio_pages(struct inode *inode, goto next; } - page = alloc_page(mapping_gfp_mask(mapping) & ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_mask(mapping) & + ~__GFP_FS); if (!page) break; - page->index = page_index; - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (add_to_page_cache(page, mapping, - page->index, GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, page_index, + GFP_NOFS)) { page_cache_release(page); goto next; } - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add_file(&pvec); - end = last_offset + PAGE_CACHE_SIZE - 1; /* * at this point, we have a locked page in the page cache @@ -551,8 +540,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, next: last_offset += PAGE_CACHE_SIZE; } - if (pagevec_count(&pvec)) - __pagevec_lru_add_file(&pvec); return 0; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4bc570a396e..6795a713b205 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -17,6 +17,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -3040,6 +3041,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) goto err; + /* the leaf has changed, it now has room. return now */ + if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) + goto err; + if (key.type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0af2e3868573..746a7248678e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -26,6 +26,7 @@ #include <linux/completion.h> #include <linux/backing-dev.h> #include <linux/wait.h> +#include <linux/slab.h> #include <asm/kmap_types.h> #include "extent_io.h" #include "extent_map.h" @@ -834,7 +835,6 @@ struct btrfs_fs_info { u64 last_trans_log_full_commit; u64 open_ioctl_trans; unsigned long mount_opt; - u64 max_extent; u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 84e6781413b1..902ce507c4e3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -17,6 +17,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include <linux/sort.h> #include "ctree.h" #include "delayed-ref.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11d0ad30e203..feca04197d02 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -27,6 +27,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/crc32c.h> +#include <linux/slab.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -43,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); -static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); - /* * end_io_wq structs are used to do processing in task context when an IO is * complete. This is used during reads to verify checksums, and it is used @@ -1374,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { int err; - bdi->name = "btrfs"; bdi->capabilities = BDI_CAP_MAP_COPY; - err = bdi_init(bdi); + err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); if (err) return err; - err = bdi_register(bdi, NULL, "btrfs-%d", - atomic_inc_return(&btrfs_bdi_num)); - if (err) { - bdi_destroy(bdi); - return err; - } - bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->unplug_io_fn = btrfs_unplug_io_fn; bdi->unplug_io_data = info; @@ -1634,7 +1625,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); fs_info->sb = sb; - fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; @@ -1922,7 +1912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, csum_root->track_dirty = 1; - btrfs_read_block_groups(extent_root); + ret = btrfs_read_block_groups(extent_root); + if (ret) { + printk(KERN_ERR "Failed to read block groups: %d\n", ret); + goto fail_block_groups; + } fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -1932,7 +1926,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) - goto fail_csum_root; + goto fail_block_groups; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, @@ -2020,7 +2014,8 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); -fail_csum_root: +fail_block_groups: + btrfs_free_block_groups(fs_info); free_extent_buffer(csum_root->node); free_extent_buffer(csum_root->commit_root); fail_dev_root: diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1727b26fb194..b34d32fdaaec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,6 +22,7 @@ #include <linux/sort.h> #include <linux/rcupdate.h> #include <linux/kthread.h> +#include <linux/slab.h> #include "compat.h" #include "hash.h" #include "ctree.h" @@ -2676,6 +2677,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, INIT_LIST_HEAD(&found->block_groups); init_rwsem(&found->groups_sem); + init_waitqueue_head(&found->flush_wait); + init_waitqueue_head(&found->allocate_wait); spin_lock_init(&found->lock); found->flags = flags; found->total_bytes = total_bytes; @@ -2846,7 +2849,7 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, } spin_unlock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->reserved_extents--; + BTRFS_I(inode)->reserved_extents -= num_items; BUG_ON(BTRFS_I(inode)->reserved_extents < 0); if (meta_sinfo->bytes_delalloc < num_bytes) { @@ -2944,12 +2947,10 @@ static void flush_delalloc(struct btrfs_root *root, spin_lock(&info->lock); - if (!info->flushing) { + if (!info->flushing) info->flushing = 1; - init_waitqueue_head(&info->flush_wait); - } else { + else wait = true; - } spin_unlock(&info->lock); @@ -3011,7 +3012,6 @@ static int maybe_allocate_chunk(struct btrfs_root *root, if (!info->allocating_chunk) { info->force_alloc = 1; info->allocating_chunk = 1; - init_waitqueue_head(&info->allocate_wait); } else { wait = true; } @@ -3111,7 +3111,7 @@ again: return -ENOSPC; } - BTRFS_I(inode)->reserved_extents++; + BTRFS_I(inode)->reserved_extents += num_items; check_force_delalloc(meta_sinfo); spin_unlock(&meta_sinfo->lock); @@ -3235,7 +3235,8 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes) { struct btrfs_space_info *data_sinfo; - int ret = 0, committed = 0; + u64 used; + int ret = 0, committed = 0, flushed = 0; /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); @@ -3247,12 +3248,21 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, again: /* make sure we have enough space to handle the data first */ spin_lock(&data_sinfo->lock); - if (data_sinfo->total_bytes - data_sinfo->bytes_used - - data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - - data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { + used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc + + data_sinfo->bytes_reserved + data_sinfo->bytes_pinned + + data_sinfo->bytes_readonly + data_sinfo->bytes_may_use + + data_sinfo->bytes_super; + + if (used + bytes > data_sinfo->total_bytes) { struct btrfs_trans_handle *trans; + if (!flushed) { + spin_unlock(&data_sinfo->lock); + flush_delalloc(root, data_sinfo); + flushed = 1; + goto again; + } + /* * if we don't have enough free bytes in this space then we need * to alloc a new chunk. @@ -4170,6 +4180,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ins->offset = 0; space_info = __find_space_info(root->fs_info, data); + if (!space_info) { + printk(KERN_ERR "No space info for %d\n", data); + return -ENOSPC; + } if (orig_root->ref_cows || empty_size) allowed_chunk_alloc = 1; @@ -5205,6 +5219,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next) { next = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!next) + return -ENOMEM; reada = 1; } btrfs_tree_lock(next); @@ -5417,7 +5433,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (ret > 0) { path->slots[level]++; continue; - } + } else if (ret < 0) + return ret; level = wc->level; } return 0; @@ -7369,7 +7386,6 @@ static int find_first_block_group(struct btrfs_root *root, } path->slots[0]++; } - ret = -ENOENT; out: return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c99121ac5d6b..d2d03684fab2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2,7 +2,6 @@ #include <linux/slab.h> #include <linux/bio.h> #include <linux/mm.h> -#include <linux/gfp.h> #include <linux/pagemap.h> #include <linux/page-flags.h> #include <linux/module.h> @@ -2679,33 +2678,20 @@ int extent_readpages(struct extent_io_tree *tree, { struct bio *bio = NULL; unsigned page_idx; - struct pagevec pvec; unsigned long bio_flags = 0; - pagevec_init(&pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add_file(&pvec); __extent_read_full_page(tree, page, get_extent, &bio, 0, &bio_flags); } page_cache_release(page); } - if (pagevec_count(&pvec)) - __pagevec_lru_add_file(&pvec); BUG_ON(!list_empty(pages)); if (bio) submit_one_bio(READ, bio, 0, bio_flags); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 28d87ba60ce8..454ca52d6451 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,5 +1,4 @@ #include <linux/err.h> -#include <linux/gfp.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/spinlock.h> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9b99886562d0..54a255065aa3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -17,6 +17,7 @@ */ #include <linux/bio.h> +#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include "ctree.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ee3323c7fc1c..29ff749ff4ca 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -28,6 +28,7 @@ #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> +#include <linux/slab.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index dd831ed31eea..f488fac04d99 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -18,6 +18,7 @@ #include <linux/pagemap.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/math64.h> #include "ctree.h" #include "free-space-cache.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02bb099845fd..2bfdc641d4e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -36,6 +36,7 @@ #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/falloc.h> +#include <linux/slab.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -796,7 +797,7 @@ static noinline int cow_file_range(struct inode *inode, while (disk_num_bytes > 0) { unsigned long op; - cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); + cur_alloc_size = disk_num_bytes; ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); @@ -1227,30 +1228,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static int btrfs_split_extent_hook(struct inode *inode, struct extent_state *orig, u64 split) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 size; - if (!(orig->state & EXTENT_DELALLOC)) return 0; - size = orig->end - orig->start + 1; - if (size > root->fs_info->max_extent) { - u64 num_extents; - u64 new_size; - - new_size = orig->end - split + 1; - num_extents = div64_u64(size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); - - /* - * if we break a large extent up then leave oustanding_extents - * be, since we've already accounted for the large extent. - */ - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) < num_extents) - return 0; - } - spin_lock(&BTRFS_I(inode)->accounting_lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->accounting_lock); @@ -1268,38 +1248,10 @@ static int btrfs_merge_extent_hook(struct inode *inode, struct extent_state *new, struct extent_state *other) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 new_size, old_size; - u64 num_extents; - /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) return 0; - old_size = other->end - other->start + 1; - if (new->start < other->start) - new_size = other->end - new->start + 1; - else - new_size = new->end - other->start + 1; - - /* we're not bigger than the max, unreserve the space and go */ - if (new_size <= root->fs_info->max_extent) { - spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->outstanding_extents--; - spin_unlock(&BTRFS_I(inode)->accounting_lock); - return 0; - } - - /* - * If we grew by another max_extent, just return, we want to keep that - * reserved amount. - */ - num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) > num_extents) - return 0; - spin_lock(&BTRFS_I(inode)->accounting_lock); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); @@ -1328,6 +1280,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_delalloc_reserve_space(root, inode, end - start + 1); + spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; @@ -1356,6 +1309,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, if (bits & EXTENT_DO_ACCOUNTING) { spin_lock(&BTRFS_I(inode)->accounting_lock); + WARN_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_unreserve_metadata_for_delalloc(root, inode, 1); @@ -5384,7 +5338,6 @@ free: void btrfs_drop_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) generic_delete_inode(inode); else @@ -5788,18 +5741,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; - u64 alloc_size; u64 cur_offset = start; u64 num_bytes = end - start; int ret = 0; u64 i_size; while (num_bytes > 0) { - alloc_size = min(num_bytes, root->fs_info->max_extent); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_reserve_extent(trans, root, alloc_size, + ret = btrfs_reserve_extent(trans, root, num_bytes, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2845c6ceecd2..e84ef60ffe35 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -39,6 +39,7 @@ #include <linux/security.h> #include <linux/xattr.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -48,7 +49,6 @@ #include "print-tree.h" #include "volumes.h" #include "locking.h" -#include "ctree.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -511,7 +511,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, em = btrfs_get_extent(inode, NULL, 0, start, len, 0); unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); - if (!em) + if (IS_ERR(em)) return 0; } @@ -1212,6 +1212,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, return -EPERM; args = kmalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + if (copy_from_user(args, argp, sizeof(*args))) { kfree(args); return -EFAULT; @@ -1375,6 +1378,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) sizeof(*range))) { ret = -EFAULT; kfree(range); + goto out; } /* compression requires us to start the IO */ if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 1c36e5cd8f55..6151f2ea38bb 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ #include <linux/sched.h> -#include <linux/gfp.h> #include <linux/pagemap.h> #include <linux/spinlock.h> #include <linux/page-flags.h> diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a8ffecd0b491..a127c0ebb2dc 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include <linux/gfp.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/writeback.h> @@ -303,6 +302,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; + struct btrfs_root *root = BTRFS_I(inode)->root; struct rb_node *node; tree = &BTRFS_I(inode)->ordered_tree; @@ -312,12 +312,13 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); spin_lock(&BTRFS_I(inode)->accounting_lock); + WARN_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, inode, 1); - spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); /* @@ -329,7 +330,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { list_del_init(&BTRFS_I(inode)->ordered_operations); } - spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_unlock(&root->fs_info->ordered_extent_lock); return 0; } diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index d0cc62bccb94..a97314cf6bd6 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -17,6 +17,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include <linux/sort.h> #include "ctree.h" #include "ref-cache.h" diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0b23942cbc0d..e558dd941ded 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -21,6 +21,7 @@ #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/rbtree.h> +#include <linux/slab.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9ac612e6ca60..1866dff0538e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -38,6 +38,7 @@ #include <linux/namei.h> #include <linux/miscdevice.h> #include <linux/magic.h> +#include <linux/slab.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -64,10 +65,9 @@ static void btrfs_put_super(struct super_block *sb) enum { Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, - Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, - Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, - Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, - Opt_flushoncommit, + Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, + Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, + Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_err, }; @@ -79,7 +79,6 @@ static match_table_t tokens = { {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_nobarrier, "nobarrier"}, - {Opt_max_extent, "max_extent=%s"}, {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, @@ -188,18 +187,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->thread_pool_size); } break; - case Opt_max_extent: - num = match_strdup(&args[0]); - if (num) { - info->max_extent = memparse(num, NULL); - kfree(num); - - info->max_extent = max_t(u64, - info->max_extent, root->sectorsize); - printk(KERN_INFO "btrfs: max_extent at %llu\n", - (unsigned long long)info->max_extent); - } - break; case Opt_max_inline: num = match_strdup(&args[0]); if (num) { @@ -529,9 +516,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nodatacow"); if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); - if (info->max_extent != (u64)-1) - seq_printf(seq, ",max_extent=%llu", - (unsigned long long)info->max_extent); if (info->max_inline != 8192 * 1024) seq_printf(seq, ",max_inline=%llu", (unsigned long long)info->max_inline); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2d654c1c794d..2cb116099b90 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -17,6 +17,7 @@ */ #include <linux/fs.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/writeback.h> #include <linux/pagemap.h> @@ -147,18 +148,13 @@ static void wait_current_trans(struct btrfs_root *root) while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); - if (cur_trans->blocked) { - mutex_unlock(&root->fs_info->trans_mutex); - schedule(); - mutex_lock(&root->fs_info->trans_mutex); - finish_wait(&root->fs_info->transaction_wait, - &wait); - } else { - finish_wait(&root->fs_info->transaction_wait, - &wait); + if (!cur_trans->blocked) break; - } + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); } + finish_wait(&root->fs_info->transaction_wait, &wait); put_transaction(cur_trans); } } @@ -760,10 +756,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root_item *new_root_item; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; + struct btrfs_root *parent_root; + struct inode *parent_inode; struct extent_buffer *tmp; struct extent_buffer *old; int ret; u64 objectid; + int namelen; + u64 index = 0; + + parent_inode = pending->dentry->d_parent->d_inode; + parent_root = BTRFS_I(parent_inode)->root; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { @@ -774,79 +777,59 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) goto fail; - record_root_in_trans(trans, root); - btrfs_set_root_last_snapshot(&root->root_item, trans->transid); - memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); - key.objectid = objectid; /* record when the snapshot was created in key.offset */ key.offset = trans->transid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - old = btrfs_lock_root_node(root); - btrfs_cow_block(trans, root, old, NULL, 0, &old); - btrfs_set_lock_blocking(old); - - btrfs_copy_root(trans, root, old, &tmp, objectid); - btrfs_tree_unlock(old); - free_extent_buffer(old); - - btrfs_set_root_node(new_root_item, tmp); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - new_root_item); - btrfs_tree_unlock(tmp); - free_extent_buffer(tmp); - if (ret) - goto fail; - - key.offset = (u64)-1; memcpy(&pending->root_key, &key, sizeof(key)); -fail: - kfree(new_root_item); - return ret; -} - -static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, - struct btrfs_pending_snapshot *pending) -{ - int ret; - int namelen; - u64 index = 0; - struct btrfs_trans_handle *trans; - struct inode *parent_inode; - struct btrfs_root *parent_root; - - parent_inode = pending->dentry->d_parent->d_inode; - parent_root = BTRFS_I(parent_inode)->root; - trans = btrfs_join_transaction(parent_root, 1); + pending->root_key.offset = (u64)-1; + record_root_in_trans(trans, parent_root); /* * insert the directory item */ namelen = strlen(pending->name); ret = btrfs_set_inode_index(parent_inode, &index); + BUG_ON(ret); ret = btrfs_insert_dir_item(trans, parent_root, pending->name, namelen, parent_inode->i_ino, &pending->root_key, BTRFS_FT_DIR, index); - - if (ret) - goto fail; + BUG_ON(ret); btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); + record_root_in_trans(trans, root); + btrfs_set_root_last_snapshot(&root->root_item, trans->transid); + memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); + + old = btrfs_lock_root_node(root); + btrfs_cow_block(trans, root, old, NULL, 0, &old); + btrfs_set_lock_blocking(old); + + btrfs_copy_root(trans, root, old, &tmp, objectid); + btrfs_tree_unlock(old); + free_extent_buffer(old); + + btrfs_set_root_node(new_root_item, tmp); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + new_root_item); + BUG_ON(ret); + btrfs_tree_unlock(tmp); + free_extent_buffer(tmp); + ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, pending->root_key.objectid, parent_root->root_key.objectid, parent_inode->i_ino, index, pending->name, namelen); - BUG_ON(ret); fail: - btrfs_end_transaction(trans, fs_info->fs_root); + kfree(new_root_item); return ret; } @@ -867,25 +850,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, return 0; } -static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_pending_snapshot *pending; - struct list_head *head = &trans->transaction->pending_snapshots; - int ret; - - while (!list_empty(head)) { - pending = list_entry(head->next, - struct btrfs_pending_snapshot, list); - ret = finish_pending_snapshot(fs_info, pending); - BUG_ON(ret); - list_del(&pending->list); - kfree(pending->name); - kfree(pending); - } - return 0; -} - static void update_super_roots(struct btrfs_root *root) { struct btrfs_root_item *root_item; @@ -1097,9 +1061,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - /* do the directory inserts of any pending snapshot creations */ - finish_pending_snapshots(trans, root->fs_info); - mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1255fcc8ade5..af57dd2b43d4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -17,6 +17,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include "ctree.h" #include "transaction.h" #include "disk-io.h" diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9df8e3f1ccab..8db7b14bbae8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -17,6 +17,7 @@ */ #include <linux/sched.h> #include <linux/bio.h> +#include <linux/slab.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/random.h> @@ -2198,9 +2199,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { - num_stripes = min_t(u64, 2, fs_devices->rw_devices); - if (num_stripes < 2) + if (fs_devices->rw_devices < 2) return -ENOSPC; + num_stripes = 2; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { @@ -2244,8 +2245,16 @@ again: do_div(calc_size, stripe_len); calc_size *= stripe_len; } + /* we don't want tiny stripes */ - calc_size = max_t(u64, min_stripe_size, calc_size); + if (!looped) + calc_size = max_t(u64, min_stripe_size, calc_size); + + /* + * we're about to do_div by the stripe_len so lets make sure + * we end up with something bigger than a stripe + */ + calc_size = max_t(u64, calc_size, stripe_len * 4); do_div(calc_size, stripe_len); calc_size *= stripe_len; @@ -3389,6 +3398,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) key.type = 0; again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 27089311fbea..37fe101a4e0d 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -9,6 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ +#include <linux/slab.h> #include <linux/mount.h> #include <linux/buffer_head.h> #include "internal.h" diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index eeb4986ea7db..d5db84a1ee0d 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -19,6 +19,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/security.h> +#include <linux/slab.h> #include "internal.h" #define CACHEFILES_KEYBUF_SIZE 512 diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 1d8332563863..0f0d41fbb03f 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -10,6 +10,7 @@ */ #include <linux/mount.h> +#include <linux/slab.h> #include <linux/file.h> #include "internal.h" diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index f3e7a0bf068b..e18b183b47e1 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -16,6 +16,7 @@ #include <linux/fsnotify.h> #include <linux/quotaops.h> #include <linux/xattr.h> +#include <linux/slab.h> #include "internal.h" static const char cachefiles_xattr_cache[] = diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 23bb0ceabe31..4b42c2bb603f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/writeback.h> /* generic_writepages */ +#include <linux/slab.h> #include <linux/pagevec.h> #include <linux/task_io_accounting_ops.h> @@ -336,16 +337,15 @@ out: /* * Get ref for the oldest snapc for an inode with dirty data... that is, the * only snap context we are allowed to write back. - * - * Caller holds i_lock. */ -static struct ceph_snap_context *__get_oldest_context(struct inode *inode, - u64 *snap_size) +static struct ceph_snap_context *get_oldest_context(struct inode *inode, + u64 *snap_size) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc = NULL; struct ceph_cap_snap *capsnap = NULL; + spin_lock(&inode->i_lock); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, capsnap->context, capsnap->dirty_pages); @@ -356,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode, break; } } - if (!snapc && ci->i_snap_realm) { - snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); + if (!snapc && ci->i_head_snapc) { + snapc = ceph_get_snap_context(ci->i_head_snapc); dout(" head snapc %p has %d dirty pages\n", snapc, ci->i_wrbuffer_ref_head); } - return snapc; -} - -static struct ceph_snap_context *get_oldest_context(struct inode *inode, - u64 *snap_size) -{ - struct ceph_snap_context *snapc = NULL; - - spin_lock(&inode->i_lock); - snapc = __get_oldest_context(inode, snap_size); spin_unlock(&inode->i_lock); return snapc; } @@ -391,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) int len = PAGE_CACHE_SIZE; loff_t i_size; int err = 0; - struct ceph_snap_context *snapc; + struct ceph_snap_context *snapc, *oldest; u64 snap_size = 0; long writeback_stat; @@ -412,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p page %p not dirty?\n", inode, page); goto out; } - if (snapc != get_oldest_context(inode, &snap_size)) { + oldest = get_oldest_context(inode, &snap_size); + if (snapc->seq > oldest->seq) { dout("writepage %p page %p snapc %p not writeable - noop\n", inode, page, (void *)page->private); /* we should only noop if called by kswapd */ WARN_ON((current->flags & PF_MEMALLOC) == 0); + ceph_put_snap_context(oldest); goto out; } + ceph_put_snap_context(oldest); /* is this a partial page at end of file? */ if (snap_size) @@ -457,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) ClearPagePrivate(page); end_page_writeback(page); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); - ceph_put_snap_context(snapc); + ceph_put_snap_context(snapc); /* page's reference */ out: return err; } @@ -516,7 +509,7 @@ static void writepages_finish(struct ceph_osd_request *req, u64 bytes = 0; struct ceph_client *client = ceph_inode_to_client(inode); long writeback_stat; - unsigned issued = __ceph_caps_issued(ci, NULL); + unsigned issued = ceph_caps_issued(ci); /* parse reply */ replyhead = msg->front.iov_base; @@ -557,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req, dout("inode %p skipping page %p\n", inode, page); wbc->pages_skipped++; } + ceph_put_snap_context((void *)page->private); page->private = 0; ClearPagePrivate(page); - ceph_put_snap_context(snapc); dout("unlocking %d %p\n", i, page); end_page_writeback(page); @@ -617,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping, int range_whole = 0; int should_loop = 1; pgoff_t max_pages = 0, max_pages_ever = 0; - struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; + struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; struct pagevec pvec; int done = 0; int rc = 0; @@ -769,9 +762,10 @@ get_more_pages: } /* only if matching snap context */ - if (snapc != (void *)page->private) { - dout("page snapc %p != oldest %p\n", - (void *)page->private, snapc); + pgsnapc = (void *)page->private; + if (pgsnapc->seq > snapc->seq) { + dout("page snapc %p %lld > oldest %p %lld\n", + pgsnapc, pgsnapc->seq, snapc, snapc->seq); unlock_page(page); if (!locked_pages) continue; /* keep looking for snap */ @@ -913,12 +907,19 @@ static int context_is_writeable_or_written(struct inode *inode, struct ceph_snap_context *snapc) { struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); - return !oldest || snapc->seq <= oldest->seq; + int ret = !oldest || snapc->seq <= oldest->seq; + + ceph_put_snap_context(oldest); + return ret; } /* * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. + * + * called with page locked. + * return success with page locked, + * or any failure (incl -EAGAIN) with page unlocked. */ static int ceph_update_writeable_page(struct file *file, loff_t pos, unsigned len, @@ -931,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file, int pos_in_page = pos & ~PAGE_CACHE_MASK; int end_in_page = pos_in_page + len; loff_t i_size; - struct ceph_snap_context *snapc; int r; + struct ceph_snap_context *snapc, *oldest; retry_locked: /* writepages currently holds page lock, but if we change that later, */ @@ -942,30 +943,34 @@ retry_locked: BUG_ON(!ci->i_snap_realm); down_read(&mdsc->snap_rwsem); BUG_ON(!ci->i_snap_realm->cached_context); - if (page->private && - (void *)page->private != ci->i_snap_realm->cached_context) { + snapc = (void *)page->private; + if (snapc && snapc != ci->i_head_snapc) { /* * this page is already dirty in another (older) snap * context! is it writeable now? */ - snapc = get_oldest_context(inode, NULL); + oldest = get_oldest_context(inode, NULL); up_read(&mdsc->snap_rwsem); - if (snapc != (void *)page->private) { + if (snapc->seq > oldest->seq) { + ceph_put_snap_context(oldest); dout(" page %p snapc %p not current or oldest\n", - page, (void *)page->private); + page, snapc); /* * queue for writeback, and wait for snapc to * be writeable or written */ - snapc = ceph_get_snap_context((void *)page->private); + snapc = ceph_get_snap_context(snapc); unlock_page(page); ceph_queue_writeback(inode); - wait_event_interruptible(ci->i_cap_wq, + r = wait_event_interruptible(ci->i_cap_wq, context_is_writeable_or_written(inode, snapc)); ceph_put_snap_context(snapc); + if (r == -ERESTARTSYS) + return r; return -EAGAIN; } + ceph_put_snap_context(oldest); /* yay, writeable, do it now (without dropping page lock) */ dout(" page %p snapc %p not current, but oldest\n", @@ -1035,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, int r; do { - /* get a page*/ + /* get a page */ page = grab_cache_page_write_begin(mapping, index, 0); if (!page) return -ENOMEM; diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index abb204fea6c7..818afe72e6c7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -1,7 +1,9 @@ #include "ceph_debug.h" #include <linux/module.h> +#include <linux/slab.h> #include <linux/err.h> +#include <linux/slab.h> #include "types.h" #include "auth_none.h" diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index b4ef6f0a6c85..8cd9e3af07f7 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -4,6 +4,7 @@ #include <linux/err.h> #include <linux/module.h> #include <linux/random.h> +#include <linux/slab.h> #include "auth_none.h" #include "auth.h" diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h index 56c05533a31c..8164df1a08be 100644 --- a/fs/ceph/auth_none.h +++ b/fs/ceph/auth_none.h @@ -1,6 +1,8 @@ #ifndef _FS_CEPH_AUTH_NONE_H #define _FS_CEPH_AUTH_NONE_H +#include <linux/slab.h> + #include "auth.h" /* diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index f0318427b6da..fee5a08da881 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -4,6 +4,7 @@ #include <linux/err.h> #include <linux/module.h> #include <linux/random.h> +#include <linux/slab.h> #include "auth_x.h" #include "auth_x_protocol.h" @@ -11,8 +12,6 @@ #include "auth.h" #include "decode.h" -struct kmem_cache *ceph_x_ticketbuf_cachep; - #define TEMP_TICKET_BUF_LEN 256 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); @@ -28,6 +27,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) return (ac->want_keys & xi->have_keys) == ac->want_keys; } +static int ceph_x_encrypt_buflen(int ilen) +{ + return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + + sizeof(u32); +} + static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *ibuf, int ilen, void *obuf, size_t olen) { @@ -124,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, char *ticket_buf; u8 struct_v; - dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC); + dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); if (!dbuf) return -ENOMEM; ret = -ENOMEM; - ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, - GFP_NOFS | GFP_ATOMIC); + ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); if (!ticket_buf) goto out_dbuf; @@ -150,6 +154,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, struct timespec validity; struct ceph_crypto_key old_key; void *tp, *tpend; + struct ceph_timespec new_validity; + struct ceph_crypto_key new_session_key; + struct ceph_buffer *new_ticket_blob; + unsigned long new_expires, new_renew_after; + u64 new_secret_id; ceph_decode_need(&p, end, sizeof(u32) + 1, bad); @@ -182,16 +191,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, goto bad; memcpy(&old_key, &th->session_key, sizeof(old_key)); - ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); + ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); if (ret) goto out; - ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); - ceph_decode_timespec(&validity, &th->validity); - th->expires = get_seconds() + validity.tv_sec; - th->renew_after = th->expires - (validity.tv_sec / 4); - dout(" expires=%lu renew_after=%lu\n", th->expires, - th->renew_after); + ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); + ceph_decode_timespec(&validity, &new_validity); + new_expires = get_seconds() + validity.tv_sec; + new_renew_after = new_expires - (validity.tv_sec / 4); + dout(" expires=%lu renew_after=%lu\n", new_expires, + new_renew_after); /* ticket blob for service */ ceph_decode_8_safe(&p, end, is_enc, bad); @@ -216,10 +225,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, dout(" ticket blob is %d bytes\n", dlen); ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); struct_v = ceph_decode_8(&tp); - th->secret_id = ceph_decode_64(&tp); - ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); + new_secret_id = ceph_decode_64(&tp); + ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); if (ret) goto out; + + /* all is well, update our ticket */ + ceph_crypto_key_destroy(&th->session_key); + if (th->ticket_blob) + ceph_buffer_put(th->ticket_blob); + th->session_key = new_session_key; + th->ticket_blob = new_ticket_blob; + th->validity = new_validity; + th->secret_id = new_secret_id; + th->expires = new_expires; + th->renew_after = new_renew_after; dout(" got ticket service %d (%s) secret_id %lld len %d\n", type, ceph_entity_type_name(type), th->secret_id, (int)th->ticket_blob->vec.iov_len); @@ -228,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, ret = 0; out: - kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf); + kfree(ticket_buf); out_dbuf: - kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf); + kfree(dbuf); return ret; bad: @@ -242,7 +262,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th, struct ceph_x_authorizer *au) { - int len; + int maxlen; struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b msg_b; void *p, *end; @@ -253,15 +273,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, dout("build_authorizer for %s %p\n", ceph_entity_type_name(th->service), au); - len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + - ticket_blob_len + 16; - dout(" need len %d\n", len); - if (au->buf && au->buf->alloc_len < len) { + maxlen = sizeof(*msg_a) + sizeof(msg_b) + + ceph_x_encrypt_buflen(ticket_blob_len); + dout(" need len %d\n", maxlen); + if (au->buf && au->buf->alloc_len < maxlen) { ceph_buffer_put(au->buf); au->buf = NULL; } if (!au->buf) { - au->buf = ceph_buffer_new(len, GFP_NOFS); + au->buf = ceph_buffer_new(maxlen, GFP_NOFS); if (!au->buf) return -ENOMEM; } @@ -296,6 +316,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, au->buf->vec.iov_len = p - au->buf->vec.iov_base; dout(" built authorizer nonce %llx len %d\n", au->nonce, (int)au->buf->vec.iov_len); + BUG_ON(au->buf->vec.iov_len > maxlen); return 0; out_buf: @@ -581,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac) remove_ticket_handler(ac, th); } - kmem_cache_destroy(ceph_x_ticketbuf_cachep); - kfree(ac->private); ac->private = NULL; } @@ -617,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac) int ret; dout("ceph_x_init %p\n", ac); + ret = -ENOMEM; xi = kzalloc(sizeof(*xi), GFP_NOFS); if (!xi) - return -ENOMEM; + goto out; - ret = -ENOMEM; - ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf", - TEMP_TICKET_BUF_LEN, 8, - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), - NULL); - if (!ceph_x_ticketbuf_cachep) - goto done_nomem; ret = -EINVAL; if (!ac->secret) { pr_err("no secret set (for auth_x protocol)\n"); - goto done_nomem; + goto out_nomem; } ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); if (ret) - goto done_nomem; + goto out_nomem; xi->starting = true; xi->ticket_handlers = RB_ROOT; @@ -646,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac) ac->ops = &ceph_x_ops; return 0; -done_nomem: +out_nomem: kfree(xi); - if (ceph_x_ticketbuf_cachep) - kmem_cache_destroy(ceph_x_ticketbuf_cachep); +out: return ret; } diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c index b98086c7aeba..c67535d70aa6 100644 --- a/fs/ceph/buffer.c +++ b/fs/ceph/buffer.c @@ -1,5 +1,8 @@ #include "ceph_debug.h" + +#include <linux/slab.h> + #include "buffer.h" #include "decode.h" diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index db122bb357b8..0c1681806867 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3,6 +3,7 @@ #include <linux/fs.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/wait.h> #include <linux/writeback.h> @@ -1204,6 +1205,12 @@ retry: if (capsnap->dirty_pages || capsnap->writing) continue; + /* + * if cap writeback already occurred, we should have dropped + * the capsnap in ceph_put_wrbuffer_cap_refs. + */ + BUG_ON(capsnap->dirty == 0); + /* pick mds, take s_mutex */ mds = __ceph_get_cap_mds(ci, &mseq); if (session && session->s_mds != mds) { @@ -1407,6 +1414,7 @@ static int try_nonblocking_invalidate(struct inode *inode) */ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session) + __releases(session->s_mutex) { struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); struct ceph_mds_client *mdsc = &client->mdsc; @@ -1414,7 +1422,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_cap *cap; int file_wanted, used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ - int drop_session_lock = session ? 0 : 1; int issued, implemented, want, retain, revoking, flushing = 0; int mds = -1; /* keep track of how far we've gone through i_caps list to avoid an infinite loop on retry */ @@ -1639,7 +1646,7 @@ ack: if (queue_invalidate) ceph_queue_invalidate(inode); - if (session && drop_session_lock) + if (session) mutex_unlock(&session->s_mutex); if (took_snap_rwsem) up_read(&mdsc->snap_rwsem); @@ -1854,8 +1861,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, } else { pr_err("%p auth cap %p not mds%d ???\n", inode, cap, session->s_mds); - spin_unlock(&inode->i_lock); } + spin_unlock(&inode->i_lock); } } @@ -2117,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) } spin_unlock(&inode->i_lock); - dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had), - last ? "last" : ""); + dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), + last ? " last" : "", put ? " put" : ""); if (last && !flushsnaps) ceph_check_caps(ci, 0, NULL); @@ -2142,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, { struct inode *inode = &ci->vfs_inode; int last = 0; - int last_snap = 0; + int complete_capsnap = 0; + int drop_capsnap = 0; int found = 0; struct ceph_cap_snap *capsnap = NULL; @@ -2165,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { if (capsnap->context == snapc) { found = 1; - capsnap->dirty_pages -= nr; - last_snap = !capsnap->dirty_pages; break; } } BUG_ON(!found); + capsnap->dirty_pages -= nr; + if (capsnap->dirty_pages == 0) { + complete_capsnap = 1; + if (capsnap->dirty == 0) + /* cap writeback completed before we created + * the cap_snap; no FLUSHSNAP is needed */ + drop_capsnap = 1; + } dout("put_wrbuffer_cap_refs on %p cap_snap %p " - " snap %lld %d/%d -> %d/%d %s%s\n", + " snap %lld %d/%d -> %d/%d %s%s%s\n", inode, capsnap, capsnap->context->seq, ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, ci->i_wrbuffer_ref, capsnap->dirty_pages, last ? " (wrbuffer last)" : "", - last_snap ? " (capsnap last)" : ""); + complete_capsnap ? " (complete capsnap)" : "", + drop_capsnap ? " (drop capsnap)" : ""); + if (drop_capsnap) { + ceph_put_snap_context(capsnap->context); + list_del(&capsnap->ci_item); + list_del(&capsnap->flushing_item); + ceph_put_cap_snap(capsnap); + } } spin_unlock(&inode->i_lock); @@ -2185,28 +2206,31 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (last) { ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); iput(inode); - } else if (last_snap) { + } else if (complete_capsnap) { ceph_flush_snaps(ci); wake_up(&ci->i_cap_wq); } + if (drop_capsnap) + iput(inode); } /* * Handle a cap GRANT message from the MDS. (Note that a GRANT may * actually be a revocation if it specifies a smaller cap set.) * - * caller holds s_mutex. + * caller holds s_mutex and i_lock, we drop both. + * * return value: * 0 - ok * 1 - check_caps on auth cap only (writeback) * 2 - check_caps (ack revoke) */ -static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, - struct ceph_mds_session *session, - struct ceph_cap *cap, - struct ceph_buffer *xattr_buf) +static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, + struct ceph_mds_session *session, + struct ceph_cap *cap, + struct ceph_buffer *xattr_buf) __releases(inode->i_lock) - + __releases(session->s_mutex) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; @@ -2216,7 +2240,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); struct timespec mtime, atime, ctime; - int reply = 0; + int check_caps = 0; int wake = 0; int writeback = 0; int revoked_rdcache = 0; @@ -2329,11 +2353,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) writeback = 1; /* will delay ack */ else if (dirty & ~newcaps) - reply = 1; /* initiate writeback in check_caps */ + check_caps = 1; /* initiate writeback in check_caps */ else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || revoked_rdcache) - reply = 2; /* send revoke ack in check_caps */ + check_caps = 2; /* send revoke ack in check_caps */ cap->issued = newcaps; + cap->implemented |= newcaps; } else if (cap->issued == newcaps) { dout("caps unchanged: %s -> %s\n", ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); @@ -2346,6 +2371,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, * pending revocation */ wake = 1; } + BUG_ON(cap->issued & ~cap->implemented); spin_unlock(&inode->i_lock); if (writeback) @@ -2359,7 +2385,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ceph_queue_invalidate(inode); if (wake) wake_up(&ci->i_cap_wq); - return reply; + + if (check_caps == 1) + ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, + session); + else if (check_caps == 2) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, session); + else + mutex_unlock(&session->s_mutex); } /* @@ -2454,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, break; } WARN_ON(capsnap->dirty_pages || capsnap->writing); - dout(" removing cap_snap %p follows %lld\n", - capsnap, follows); + dout(" removing %p cap_snap %p follows %lld\n", + inode, capsnap, follows); ceph_put_snap_context(capsnap->context); list_del(&capsnap->ci_item); list_del(&capsnap->flushing_item); @@ -2548,9 +2581,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, ci->i_cap_exporting_issued = cap->issued; } __ceph_remove_cap(cap); - } else { - WARN_ON(!cap); } + /* else, we already released it */ spin_unlock(&inode->i_lock); } @@ -2621,9 +2653,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, u64 cap_id; u64 size, max_size; u64 tid; - int check_caps = 0; void *snaptrace; - int r; dout("handle_caps from mds%d\n", mds); @@ -2668,8 +2698,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, snaptrace, le32_to_cpu(h->snap_trace_len)); - check_caps = 1; /* we may have sent a RELEASE to the old auth */ - goto done; + ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, + session); + goto done_unlocked; } /* the rest require a cap */ @@ -2686,16 +2717,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: - r = handle_cap_grant(inode, h, session, cap, msg->middle); - if (r == 1) - ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, - session); - else if (r == 2) - ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_NODELAY, - session); - break; + handle_cap_grant(inode, h, session, cap, msg->middle); + goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: handle_cap_flush_ack(inode, tid, h, session, cap); @@ -2713,9 +2736,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, done: mutex_unlock(&session->s_mutex); - - if (check_caps) - ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL); +done_unlocked: if (inode) iput(inode); return; @@ -2838,11 +2859,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode, struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; int ret = 0; - - dout("encode_inode_release %p mds%d drop %s unless %s\n", inode, - mds, ceph_cap_string(drop), ceph_cap_string(unless)); + int used = 0; spin_lock(&inode->i_lock); + used = __ceph_caps_used(ci); + + dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, + mds, ceph_cap_string(used), ceph_cap_string(drop), + ceph_cap_string(unless)); + + /* only drop unused caps */ + drop &= ~used; + cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { if (force || diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index 291ac288e791..f704b3b62424 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -3,6 +3,7 @@ #include <linux/err.h> #include <linux/scatterlist.h> +#include <linux/slab.h> #include <crypto/hash.h> #include "crypto.h" diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index e159f1415110..f7048da92acc 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -1,6 +1,7 @@ #include "ceph_debug.h" #include <linux/device.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/debugfs.h> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5107384ee029..650d2db5ed26 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -3,6 +3,7 @@ #include <linux/spinlock.h> #include <linux/fs_struct.h> #include <linux/namei.h> +#include <linux/slab.h> #include <linux/sched.h> #include "super.h" @@ -170,11 +171,11 @@ more: spin_lock(&inode->i_lock); spin_lock(&dcache_lock); + last = dentry; + if (err < 0) goto out_unlock; - last = dentry; - p = p->prev; filp->f_pos++; @@ -288,8 +289,10 @@ more: CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; /* discard old result, if any */ - if (fi->last_readdir) + if (fi->last_readdir) { ceph_mdsc_put_request(fi->last_readdir); + fi->last_readdir = NULL; + } /* requery frag tree, as the frag topology may have changed */ frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); @@ -309,7 +312,7 @@ more: req->r_readdir_offset = fi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); req->r_args.readdir.max_entries = cpu_to_le32(max_entries); - req->r_num_caps = max_entries; + req->r_num_caps = max_entries + 1; err = ceph_mdsc_do_request(mdsc, NULL, req); if (err < 0) { ceph_mdsc_put_request(req); @@ -486,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct inode *inode = ceph_get_snapdir(parent); dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", dentry, dentry->d_name.len, dentry->d_name.name, inode); + BUG_ON(!d_unhashed(dentry)); d_add(dentry, inode); err = 0; } @@ -876,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * do_request, above). If there is no trace, we need * to do it here. */ + + /* d_move screws up d_subdirs order */ + ceph_i_clear(new_dir, CEPH_I_COMPLETE); + d_move(old_dentry, new_dentry); + + /* ensure target dentry is invalidated, despite + rehashing bug in vfs_rename_dir */ + new_dentry->d_time = jiffies; + ceph_dentry(new_dentry)->lease_shared_gen = 0; } ceph_mdsc_put_request(req); return err; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index fc68e39cbad6..9d67572fb328 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -1,6 +1,7 @@ #include "ceph_debug.h" #include <linux/exportfs.h> +#include <linux/slab.h> #include <asm/unaligned.h> #include "super.h" diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5d2af8464f6a..ed6f19721d6e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1,6 +1,7 @@ #include "ceph_debug.h" #include <linux/sched.h> +#include <linux/slab.h> #include <linux/file.h> #include <linux/namei.h> #include <linux/writeback.h> @@ -664,7 +665,8 @@ more: * throw out any page cache pages in this range. this * may block. */ - truncate_inode_pages_range(inode->i_mapping, pos, pos+len); + truncate_inode_pages_range(inode->i_mapping, pos, + (pos+len) | (PAGE_CACHE_SIZE-1)); } else { pages = alloc_page_vector(num_pages); if (IS_ERR(pages)) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7abe1aed819b..261f3e6c0bcf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode) ceph_queue_caps_release(inode); + /* + * we may still have a snap_realm reference if there are stray + * caps in i_cap_exporting_issued or i_snap_caps. + */ + if (ci->i_snap_realm) { + struct ceph_mds_client *mdsc = + &ceph_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_snap_realm *realm = ci->i_snap_realm; + + dout(" dropping residual ref to snap realm %p\n", realm); + spin_lock(&realm->inodes_with_caps_lock); + list_del_init(&ci->i_snap_realm_item); + spin_unlock(&realm->inodes_with_caps_lock); + ceph_put_snap_realm(mdsc, realm); + } + kfree(ci->i_symlink); while ((n = rb_first(&ci->i_fragtree)) != NULL) { frag = rb_entry(n, struct ceph_inode_frag, node); @@ -870,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, struct inode *in = NULL; struct ceph_mds_reply_inode *ininfo; struct ceph_vino vino; + struct ceph_client *client = ceph_sb_to_client(sb); int i = 0; int err = 0; @@ -933,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, return err; } - if (rinfo->head->is_dentry && !req->r_aborted) { + /* + * ignore null lease/binding on snapdir ENOENT, or else we + * will have trouble splicing in the virtual snapdir later + */ + if (rinfo->head->is_dentry && !req->r_aborted && + (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, + client->mount_args->snapdir_name, + req->r_dentry->d_name.len))) { /* * lookup link rename : null -> possibly existing inode * mknod symlink mkdir : null -> new inode @@ -973,6 +997,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, dn, dn->d_name.len, dn->d_name.name); dout("fill_trace doing d_move %p -> %p\n", req->r_old_dentry, dn); + + /* d_move screws up d_subdirs order */ + ceph_i_clear(dir, CEPH_I_COMPLETE); + d_move(req->r_old_dentry, dn); dout(" src %p '%.*s' dst %p '%.*s'\n", req->r_old_dentry, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a2600101ec22..60a9a4ae47be 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1,6 +1,7 @@ #include "ceph_debug.h" #include <linux/wait.h> +#include <linux/slab.h> #include <linux/sched.h> #include "mds_client.h" @@ -328,6 +329,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *s; s = kzalloc(sizeof(*s), GFP_NOFS); + if (!s) + return ERR_PTR(-ENOMEM); s->s_mdsc = mdsc; s->s_mds = mds; s->s_state = CEPH_MDS_SESSION_NEW; @@ -529,7 +532,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc, { dout("__unregister_request %p tid %lld\n", req, req->r_tid); rb_erase(&req->r_node, &mdsc->request_tree); - ceph_mdsc_put_request(req); + RB_CLEAR_NODE(&req->r_node); if (req->r_unsafe_dir) { struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); @@ -538,6 +541,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, list_del_init(&req->r_unsafe_dir_item); spin_unlock(&ci->i_unsafe_lock); } + + ceph_mdsc_put_request(req); } /* @@ -862,6 +867,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, if (time_after_eq(jiffies, session->s_cap_ttl) && time_after_eq(session->s_cap_ttl, session->s_renew_requested)) pr_info("mds%d caps stale\n", session->s_mds); + session->s_renew_requested = jiffies; /* do not try to renew caps until a recovering mds has reconnected * with its clients. */ @@ -874,7 +880,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, dout("send_renew_caps to mds%d (%s)\n", session->s_mds, ceph_mds_state_name(state)); - session->s_renew_requested = jiffies; msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, ++session->s_renew_seq); if (IS_ERR(msg)) @@ -1566,8 +1571,13 @@ static int __do_request(struct ceph_mds_client *mdsc, /* get, open session */ session = __ceph_lookup_mds_session(mdsc, mds); - if (!session) + if (!session) { session = register_session(mdsc, mds); + if (IS_ERR(session)) { + err = PTR_ERR(session); + goto finish; + } + } dout("do_request mds%d session %p state %s\n", mds, session, session_state_name(session->s_state)); if (session->s_state != CEPH_MDS_SESSION_OPEN && @@ -1770,7 +1780,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) dout("handle_reply %p\n", req); /* correct session? */ - if (!req->r_session && req->r_session != session) { + if (req->r_session != session) { pr_err("mdsc_handle_reply got %llu on session mds%d" " not mds%d\n", tid, session->s_mds, req->r_session ? req->r_session->s_mds : -1); @@ -2682,29 +2692,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) */ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) { - struct ceph_mds_request *req = NULL; + struct ceph_mds_request *req = NULL, *nextreq; struct rb_node *n; mutex_lock(&mdsc->mutex); dout("wait_unsafe_requests want %lld\n", want_tid); +restart: req = __get_oldest_req(mdsc); while (req && req->r_tid <= want_tid) { + /* find next request */ + n = rb_next(&req->r_node); + if (n) + nextreq = rb_entry(n, struct ceph_mds_request, r_node); + else + nextreq = NULL; if ((req->r_op & CEPH_MDS_OP_WRITE)) { /* write op */ ceph_mdsc_get_request(req); + if (nextreq) + ceph_mdsc_get_request(nextreq); mutex_unlock(&mdsc->mutex); dout("wait_unsafe_requests wait on %llu (want %llu)\n", req->r_tid, want_tid); wait_for_completion(&req->r_safe_completion); mutex_lock(&mdsc->mutex); - n = rb_next(&req->r_node); ceph_mdsc_put_request(req); - } else { - n = rb_next(&req->r_node); + if (!nextreq) + break; /* next dne before, so we're done! */ + if (RB_EMPTY_NODE(&nextreq->r_node)) { + /* next request was removed from tree */ + ceph_mdsc_put_request(nextreq); + goto restart; + } + ceph_mdsc_put_request(nextreq); /* won't go away */ } - if (!n) - break; - req = rb_entry(n, struct ceph_mds_request, r_node); + req = nextreq; } mutex_unlock(&mdsc->mutex); dout("wait_unsafe_requests done\n"); diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 781656a49bf8..509f57d9ccb3 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -6,6 +6,7 @@ #include <linux/inet.h> #include <linux/kthread.h> #include <linux/net.h> +#include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> #include <net/tcp.h> @@ -29,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; +#ifdef CONFIG_LOCKDEP +static struct lock_class_key socket_class; +#endif + static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); @@ -227,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) con->sock = sock; sock->sk->sk_allocation = GFP_NOFS; +#ifdef CONFIG_LOCKDEP + lockdep_set_class(&sock->sk->sk_lock, &socket_class); +#endif + set_sock_callbacks(sock, con); dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); @@ -332,6 +341,7 @@ static void reset_connection(struct ceph_connection *con) con->out_msg = NULL; } con->in_seq = 0; + con->in_seq_acked = 0; } /* @@ -366,6 +376,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) } /* + * return true if this connection ever successfully opened + */ +bool ceph_con_opened(struct ceph_connection *con) +{ + return con->connect_seq > 0; +} + +/* * generic get/put */ struct ceph_connection *ceph_con_get(struct ceph_connection *con) @@ -830,13 +848,6 @@ static void prepare_read_connect(struct ceph_connection *con) con->in_base_pos = 0; } -static void prepare_read_connect_retry(struct ceph_connection *con) -{ - dout("prepare_read_connect_retry %p\n", con); - con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr) - + sizeof(con->peer_addr_for_me); -} - static void prepare_read_ack(struct ceph_connection *con) { dout("prepare_read_ack %p\n", con); @@ -1146,7 +1157,7 @@ static int process_connect(struct ceph_connection *con) } con->auth_retry = 1; prepare_write_connect(con->msgr, con, 0); - prepare_read_connect_retry(con); + prepare_read_connect(con); break; case CEPH_MSGR_TAG_RESETSESSION: @@ -1323,6 +1334,7 @@ static int read_partial_message(struct ceph_connection *con) unsigned front_len, middle_len, data_len, data_off; int datacrc = con->msgr->nocrc; int skip; + u64 seq; dout("read_partial_message con %p msg %p\n", con, m); @@ -1357,6 +1369,25 @@ static int read_partial_message(struct ceph_connection *con) return -EIO; data_off = le16_to_cpu(con->in_hdr.data_off); + /* verify seq# */ + seq = le64_to_cpu(con->in_hdr.seq); + if ((s64)seq - (s64)con->in_seq < 1) { + pr_info("skipping %s%lld %s seq %lld, expected %lld\n", + ENTITY_NAME(con->peer_name), + pr_addr(&con->peer_addr.in_addr), + seq, con->in_seq + 1); + con->in_base_pos = -front_len - middle_len - data_len - + sizeof(m->footer); + con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; + return 0; + } else if ((s64)seq - (s64)con->in_seq > 1) { + pr_err("read_partial_message bad seq %lld expected %lld\n", + seq, con->in_seq + 1); + con->error_msg = "bad message sequence # for incoming message"; + return -EBADMSG; + } + /* allocate message? */ if (!con->in_msg) { dout("got hdr type %d front %d data %d\n", con->in_hdr.type, @@ -1368,6 +1399,7 @@ static int read_partial_message(struct ceph_connection *con) con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; return 0; } if (IS_ERR(con->in_msg)) { @@ -1843,8 +1875,6 @@ static void ceph_fault(struct ceph_connection *con) goto out; } - clear_bit(BUSY, &con->state); /* to avoid an improbable race */ - mutex_lock(&con->mutex); if (test_bit(CLOSED, &con->state)) goto out_unlock; @@ -2021,6 +2051,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) ceph_msg_put(con->in_msg); con->in_msg = NULL; con->in_tag = CEPH_MSGR_TAG_READY; + con->in_seq++; } else { dout("con_revoke_pages %p msg %p pages %p no-op\n", con, con->in_msg, msg); diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 4caaa5911110..a343dae73cdc 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con); extern void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr); +extern bool ceph_con_opened(struct ceph_connection *con); extern void ceph_con_close(struct ceph_connection *con); extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 890597c09d43..8fdc011ca956 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -1,6 +1,7 @@ #include "ceph_debug.h" #include <linux/types.h> +#include <linux/slab.h> #include <linux/random.h> #include <linux/sched.h> diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index dbe63db9762f..c7b4dedaace6 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) */ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { + struct ceph_osd_request *req; int ret = 0; dout("__reset_osd %p osd%d\n", osd, osd->o_osd); if (list_empty(&osd->o_requests)) { __remove_osd(osdc, osd); + } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], + &osd->o_con.peer_addr, + sizeof(osd->o_con.peer_addr)) == 0 && + !ceph_con_opened(&osd->o_con)) { + dout(" osd addr hasn't changed and connection never opened," + " letting msgr retry"); + /* touch each r_stamp for handle_timeout()'s benfit */ + list_for_each_entry(req, &osd->o_requests, r_osd_item) + req->r_stamp = jiffies; + ret = -EAGAIN; } else { ceph_con_close(&osd->o_con); ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); @@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc, reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ reqhead->reassert_version = req->r_reassert_version; - req->r_sent_stamp = jiffies; + req->r_stamp = jiffies; list_move_tail(&osdc->req_lru, &req->r_req_lru_item); ceph_msg_get(req->r_request); /* send consumes a ref */ @@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work) unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; unsigned long keepalive = osdc->client->mount_args->osd_keepalive_timeout * HZ; - unsigned long last_sent = 0; + unsigned long last_stamp = 0; struct rb_node *p; struct list_head slow_osds; @@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work) req = list_entry(osdc->req_lru.next, struct ceph_osd_request, r_req_lru_item); - if (time_before(jiffies, req->r_sent_stamp + timeout)) + if (time_before(jiffies, req->r_stamp + timeout)) break; - BUG_ON(req == last_req && req->r_sent_stamp == last_sent); + BUG_ON(req == last_req && req->r_stamp == last_stamp); last_req = req; - last_sent = req->r_sent_stamp; + last_stamp = req->r_stamp; osd = req->r_osd; BUG_ON(!osd); @@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work) */ INIT_LIST_HEAD(&slow_osds); list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { - if (time_before(jiffies, req->r_sent_stamp + keepalive)) + if (time_before(jiffies, req->r_stamp + keepalive)) break; osd = req->r_osd; @@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc, dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); if (kickosd) { - __reset_osd(osdc, kickosd); + err = __reset_osd(osdc, kickosd); + if (err == -EAGAIN) + return 1; } else { for (p = rb_first(&osdc->osds); p; p = n) { struct ceph_osd *osd = @@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc, kick: dout("kicking %p tid %llu osd%d\n", req, req->r_tid, - req->r_osd->o_osd); + req->r_osd ? req->r_osd->o_osd : -1); req->r_flags |= CEPH_OSD_FLAG_RETRY; err = __send_request(osdc, req); if (err) { diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index 1b1a3ca43afc..b0759911e7c3 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h @@ -70,7 +70,7 @@ struct ceph_osd_request { char r_oid[40]; /* object name */ int r_oid_len; - unsigned long r_sent_stamp; + unsigned long r_stamp; /* send OR check time */ bool r_resend; /* msg send failed, needs retry */ struct ceph_file_layout r_file_layout; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index b83f2692b835..2e2c15eed82a 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -1,4 +1,7 @@ +#include "ceph_debug.h" + +#include <linux/slab.h> #include <asm/div64.h> #include "super.h" @@ -6,7 +9,6 @@ #include "crush/hash.h" #include "crush/mapper.h" #include "decode.h" -#include "ceph_debug.h" char *ceph_osdmap_state_str(char *str, int len, int state) { @@ -312,71 +314,6 @@ bad: return ERR_PTR(err); } - -/* - * osd map - */ -void ceph_osdmap_destroy(struct ceph_osdmap *map) -{ - dout("osdmap_destroy %p\n", map); - if (map->crush) - crush_destroy(map->crush); - while (!RB_EMPTY_ROOT(&map->pg_temp)) { - struct ceph_pg_mapping *pg = - rb_entry(rb_first(&map->pg_temp), - struct ceph_pg_mapping, node); - rb_erase(&pg->node, &map->pg_temp); - kfree(pg); - } - while (!RB_EMPTY_ROOT(&map->pg_pools)) { - struct ceph_pg_pool_info *pi = - rb_entry(rb_first(&map->pg_pools), - struct ceph_pg_pool_info, node); - rb_erase(&pi->node, &map->pg_pools); - kfree(pi); - } - kfree(map->osd_state); - kfree(map->osd_weight); - kfree(map->osd_addr); - kfree(map); -} - -/* - * adjust max osd value. reallocate arrays. - */ -static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) -{ - u8 *state; - struct ceph_entity_addr *addr; - u32 *weight; - - state = kcalloc(max, sizeof(*state), GFP_NOFS); - addr = kcalloc(max, sizeof(*addr), GFP_NOFS); - weight = kcalloc(max, sizeof(*weight), GFP_NOFS); - if (state == NULL || addr == NULL || weight == NULL) { - kfree(state); - kfree(addr); - kfree(weight); - return -ENOMEM; - } - - /* copy old? */ - if (map->osd_state) { - memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); - memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); - memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); - kfree(map->osd_state); - kfree(map->osd_addr); - kfree(map->osd_weight); - } - - map->osd_state = state; - map->osd_weight = weight; - map->osd_addr = addr; - map->max_osd = max; - return 0; -} - /* * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid * to a set of osds) @@ -480,6 +417,113 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) return NULL; } +static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) +{ + rb_erase(&pi->node, root); + kfree(pi->name); + kfree(pi); +} + +void __decode_pool(void **p, struct ceph_pg_pool_info *pi) +{ + ceph_decode_copy(p, &pi->v, sizeof(pi->v)); + calc_pg_masks(pi); + *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); + *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; +} + +static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) +{ + struct ceph_pg_pool_info *pi; + u32 num, len, pool; + + ceph_decode_32_safe(p, end, num, bad); + dout(" %d pool names\n", num); + while (num--) { + ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_32_safe(p, end, len, bad); + dout(" pool %d len %d\n", pool, len); + pi = __lookup_pg_pool(&map->pg_pools, pool); + if (pi) { + kfree(pi->name); + pi->name = kmalloc(len + 1, GFP_NOFS); + if (pi->name) { + memcpy(pi->name, *p, len); + pi->name[len] = '\0'; + dout(" name is %s\n", pi->name); + } + } + *p += len; + } + return 0; + +bad: + return -EINVAL; +} + +/* + * osd map + */ +void ceph_osdmap_destroy(struct ceph_osdmap *map) +{ + dout("osdmap_destroy %p\n", map); + if (map->crush) + crush_destroy(map->crush); + while (!RB_EMPTY_ROOT(&map->pg_temp)) { + struct ceph_pg_mapping *pg = + rb_entry(rb_first(&map->pg_temp), + struct ceph_pg_mapping, node); + rb_erase(&pg->node, &map->pg_temp); + kfree(pg); + } + while (!RB_EMPTY_ROOT(&map->pg_pools)) { + struct ceph_pg_pool_info *pi = + rb_entry(rb_first(&map->pg_pools), + struct ceph_pg_pool_info, node); + __remove_pg_pool(&map->pg_pools, pi); + } + kfree(map->osd_state); + kfree(map->osd_weight); + kfree(map->osd_addr); + kfree(map); +} + +/* + * adjust max osd value. reallocate arrays. + */ +static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) +{ + u8 *state; + struct ceph_entity_addr *addr; + u32 *weight; + + state = kcalloc(max, sizeof(*state), GFP_NOFS); + addr = kcalloc(max, sizeof(*addr), GFP_NOFS); + weight = kcalloc(max, sizeof(*weight), GFP_NOFS); + if (state == NULL || addr == NULL || weight == NULL) { + kfree(state); + kfree(addr); + kfree(weight); + return -ENOMEM; + } + + /* copy old? */ + if (map->osd_state) { + memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); + memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); + memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); + kfree(map->osd_state); + kfree(map->osd_addr); + kfree(map->osd_weight); + } + + map->osd_state = state; + map->osd_weight = weight; + map->osd_addr = addr; + map->max_osd = max; + return 0; +} + /* * decode a full map. */ @@ -516,7 +560,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); - pi = kmalloc(sizeof(*pi), GFP_NOFS); + pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; pi->id = ceph_decode_32(p); @@ -526,13 +570,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ev, CEPH_PG_POOL_VERSION); goto bad; } - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); + __decode_pool(p, pi); __insert_pg_pool(&map->pg_pools, pi); - calc_pg_masks(pi); - *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); - *p += le32_to_cpu(pi->v.num_removed_snap_intervals) - * sizeof(u64) * 2; } + + if (version >= 5 && __decode_pool_names(p, end, map) < 0) + goto bad; + ceph_decode_32_safe(p, end, map->pool_max, bad); ceph_decode_32_safe(p, end, map->flags, bad); @@ -706,7 +750,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } pi = __lookup_pg_pool(&map->pg_pools, pool); if (!pi) { - pi = kmalloc(sizeof(*pi), GFP_NOFS); + pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) { err = -ENOMEM; goto bad; @@ -714,9 +758,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, pi->id = pool; __insert_pg_pool(&map->pg_pools, pi); } - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); - calc_pg_masks(pi); + __decode_pool(p, pi); } + if (version >= 5 && __decode_pool_names(p, end, map) < 0) + goto bad; /* old_pool */ ceph_decode_32_safe(p, end, len, bad); @@ -725,10 +770,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ceph_decode_32_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); - if (pi) { - rb_erase(&pi->node, &map->pg_pools); - kfree(pi); - } + if (pi) + __remove_pg_pool(&map->pg_pools, pi); } /* new_up */ diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 1fb55afb2642..8bc9f1e4f562 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h @@ -23,6 +23,7 @@ struct ceph_pg_pool_info { int id; struct ceph_pg_pool v; int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + char *name; }; struct ceph_pg_mapping { diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index 370e93695474..5f8dbf7c745a 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c @@ -1,4 +1,5 @@ +#include <linux/gfp.h> #include <linux/pagemap.h> #include <linux/highmem.h> diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 26ac8b89a676..a1fc1d017b58 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -11,8 +11,10 @@ /* * osdmap encoding versions */ -#define CEPH_OSDMAP_INC_VERSION 4 -#define CEPH_OSDMAP_VERSION 4 +#define CEPH_OSDMAP_INC_VERSION 5 +#define CEPH_OSDMAP_INC_VERSION_EXT 5 +#define CEPH_OSDMAP_VERSION 5 +#define CEPH_OSDMAP_VERSION_EXT 5 /* * fs id diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index bf2a5f3846a4..d5114db70453 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -1,6 +1,7 @@ #include "ceph_debug.h" #include <linux/sort.h> +#include <linux/slab.h> #include "super.h" #include "decode.h" @@ -314,9 +315,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) because we rebuild_snap_realms() works _downward_ in hierarchy after each update.) */ if (realm->cached_context && - realm->cached_context->seq <= realm->seq && + realm->cached_context->seq == realm->seq && (!parent || - realm->cached_context->seq <= parent->cached_context->seq)) { + realm->cached_context->seq >= parent->cached_context->seq)) { dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" " (unchanged)\n", realm->ino, realm, realm->cached_context, @@ -430,8 +431,7 @@ static int dup_array(u64 **dst, __le64 *src, int num) * Caller must hold snap_rwsem for read (i.e., the realm topology won't * change). */ -void ceph_queue_cap_snap(struct ceph_inode_info *ci, - struct ceph_snap_context *snapc) +void ceph_queue_cap_snap(struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; @@ -450,10 +450,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, as no new writes are allowed to start when pending, so any writes in progress now were started before the previous cap_snap. lucky us. */ - dout("queue_cap_snap %p snapc %p seq %llu used %d" - " already pending\n", inode, snapc, snapc->seq, used); + dout("queue_cap_snap %p already pending\n", inode); kfree(capsnap); } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { + struct ceph_snap_context *snapc = ci->i_head_snapc; + igrab(inode); atomic_set(&capsnap->nref, 1); @@ -462,7 +463,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, INIT_LIST_HEAD(&capsnap->flushing_item); capsnap->follows = snapc->seq - 1; - capsnap->context = ceph_get_snap_context(snapc); capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = __ceph_caps_dirty(ci); @@ -479,7 +479,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, snapshot. */ capsnap->dirty_pages = ci->i_wrbuffer_ref_head; ci->i_wrbuffer_ref_head = 0; - ceph_put_snap_context(ci->i_head_snapc); + capsnap->context = snapc; ci->i_head_snapc = NULL; list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); @@ -521,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->ctime = inode->i_ctime; capsnap->time_warp_seq = ci->i_time_warp_seq; if (capsnap->dirty_pages) { - dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu " + dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu " "still has %d dirty pages\n", inode, capsnap, capsnap->context, capsnap->context->seq, - capsnap->size, capsnap->dirty_pages); + ceph_cap_string(capsnap->dirty), capsnap->size, + capsnap->dirty_pages); return 0; } - dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n", + dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n", inode, capsnap, capsnap->context, - capsnap->context->seq, capsnap->size); + capsnap->context->seq, ceph_cap_string(capsnap->dirty), + capsnap->size); spin_lock(&mdsc->snap_flush_lock); list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); @@ -601,7 +603,7 @@ more: if (lastinode) iput(lastinode); lastinode = inode; - ceph_queue_cap_snap(ci, realm->cached_context); + ceph_queue_cap_snap(ci); spin_lock(&realm->inodes_with_caps_lock); } spin_unlock(&realm->inodes_with_caps_lock); @@ -818,11 +820,12 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, * queued (again) by ceph_update_snap_trace() * below. Queue it _now_, under the old context. */ + spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&inode->i_lock); - ceph_queue_cap_snap(ci, - ci->i_snap_realm->cached_context); + ceph_queue_cap_snap(ci); iput(inode); continue; @@ -866,16 +869,20 @@ skip_inode: continue; ci = ceph_inode(inode); spin_lock(&inode->i_lock); - if (!ci->i_snap_realm) - goto split_skip_inode; - ceph_put_snap_realm(mdsc, ci->i_snap_realm); - spin_lock(&realm->inodes_with_caps_lock); - list_add(&ci->i_snap_realm_item, - &realm->inodes_with_caps); - ci->i_snap_realm = realm; - spin_unlock(&realm->inodes_with_caps_lock); - ceph_get_snap_realm(mdsc, realm); -split_skip_inode: + if (list_empty(&ci->i_snap_realm_item)) { + struct ceph_snap_realm *oldrealm = + ci->i_snap_realm; + + dout(" moving %p to split realm %llx %p\n", + inode, realm->ino, realm); + spin_lock(&realm->inodes_with_caps_lock); + list_add(&ci->i_snap_realm_item, + &realm->inodes_with_caps); + ci->i_snap_realm = realm; + spin_unlock(&realm->inodes_with_caps_lock); + ceph_get_snap_realm(mdsc, realm); + ceph_put_snap_realm(mdsc, oldrealm); + } spin_unlock(&inode->i_lock); iput(inode); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4290a6e860b0..f888cf487b7c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -11,6 +11,7 @@ #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <linux/statfs.h> #include <linux/string.h> #include <linux/version.h> @@ -995,9 +996,10 @@ static int __init init_ceph(void) if (ret) goto out_icache; - pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n", - CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH, - CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL); + pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", + CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, + CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, + CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); return 0; out_icache: diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 65d12036b670..13513b80d87f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -10,8 +10,10 @@ #include <linux/fs.h> #include <linux/mempool.h> #include <linux/pagemap.h> +#include <linux/slab.h> #include <linux/wait.h> #include <linux/writeback.h> +#include <linux/slab.h> #include "types.h" #include "messenger.h" @@ -714,8 +716,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m, extern void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg); -extern void ceph_queue_cap_snap(struct ceph_inode_info *ci, - struct ceph_snap_context *snapc); +extern void ceph_queue_cap_snap(struct ceph_inode_info *ci); extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap); extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 37d6ce645691..2845422907fc 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -3,6 +3,7 @@ #include "decode.h" #include <linux/xattr.h> +#include <linux/slab.h> static bool ceph_is_valid_xattr(const char *name) { diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index b1d61d0bdfc7..78e4d2a3a68b 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -15,6 +15,7 @@ #include <linux/dcache.h> #include <linux/mount.h> #include <linux/namei.h> +#include <linux/slab.h> #include <linux/vfs.h> #include <linux/fs.h> #include "cifsglob.h" diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 4797787c6a44..246a167cb913 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -18,6 +18,8 @@ #ifndef _CIFS_FS_SB_H #define _CIFS_FS_SB_H +#include <linux/backing-dev.h> + #define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ #define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */ #define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ @@ -50,5 +52,6 @@ struct cifs_sb_info { #ifdef CONFIG_CIFS_DFS_UPCALL char *mountdata; /* mount options received at mount time */ #endif + struct backing_dev_info bdi; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 8ec7736ce954..310d12f69a92 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -20,6 +20,7 @@ */ #include <linux/list.h> +#include <linux/slab.h> #include <linux/string.h> #include <keys/user-type.h> #include <linux/key-type.h> diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 714a542cbafc..d07676bd76d2 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -19,6 +19,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> +#include <linux/slab.h> #include "cifs_unicode.h" #include "cifs_uniupr.h" #include "cifspdu.h" diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 7dfe0842a6f6..9b716d044bbd 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -22,6 +22,7 @@ */ #include <linux/fs.h> +#include <linux/slab.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsacl.h" diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 7efe1745494d..fbe986430d0c 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -20,6 +20,7 @@ */ #include <linux/fs.h> +#include <linux/slab.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifs_debug.h" diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5183bc2a1916..ad235d604a0b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data, if (cifs_sb == NULL) return -ENOMEM; + rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); + if (rc) { + kfree(cifs_sb); + return rc; + } + #ifdef CONFIG_CIFS_DFS_UPCALL /* copy mount params to sb for use in submounts */ /* BB: should we move this after the mount so we @@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data, int len = strlen(data); cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); if (cifs_sb->mountdata == NULL) { + bdi_destroy(&cifs_sb->bdi); kfree(sb->s_fs_info); sb->s_fs_info = NULL; return -ENOMEM; @@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data, sb->s_magic = CIFS_MAGIC_NUMBER; sb->s_op = &cifs_super_ops; + sb->s_bdi = &cifs_sb->bdi; /* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) sb->s_blocksize = cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ @@ -183,6 +191,7 @@ out_mount_failed: } #endif unload_nls(cifs_sb->local_nls); + bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb); } return rc; @@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb) #endif unload_nls(cifs_sb->local_nls); + bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb); unlock_kernel(); @@ -808,6 +818,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .release = cifs_close, .fsync = cifs_fsync, .flush = cifs_flush, + .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, #ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 63c89d1d70b5..ecf0ffbe2b64 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -18,6 +18,7 @@ */ #include <linux/in.h> #include <linux/in6.h> +#include <linux/slab.h> #include <linux/slow-work.h> #include "cifs_fs_sb.h" #include "cifsacl.h" diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 7cc7f83e9314..5d3f29fef532 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -30,6 +30,7 @@ #include <linux/fs.h> #include <linux/kernel.h> #include <linux/vfs.h> +#include <linux/slab.h> #include <linux/posix_acl_xattr.h> #include <asm/uaccess.h> #include "cifspdu.h" @@ -1430,6 +1431,8 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, __u32 bytes_sent; __u16 byte_count; + *nbytes = 0; + /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ if (tcon->ses == NULL) return -ECONNABORTED; @@ -1512,11 +1515,18 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, cifs_stats_inc(&tcon->num_writes); if (rc) { cFYI(1, ("Send error in write = %d", rc)); - *nbytes = 0; } else { *nbytes = le16_to_cpu(pSMBr->CountHigh); *nbytes = (*nbytes) << 16; *nbytes += le16_to_cpu(pSMBr->Count); + + /* + * Mask off high 16 bits when bytes written as returned by the + * server is greater than bytes requested by the client. Some + * OS/2 servers are known to set incorrect CountHigh values. + */ + if (*nbytes > count) + *nbytes &= 0xFFFF; } cifs_buf_release(pSMB); @@ -1605,6 +1615,14 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, *nbytes = le16_to_cpu(pSMBr->CountHigh); *nbytes = (*nbytes) << 16; *nbytes += le16_to_cpu(pSMBr->Count); + + /* + * Mask off high 16 bits when bytes written as returned by the + * server is greater than bytes requested by the client. OS/2 + * servers are known to set incorrect CountHigh values. + */ + if (*nbytes > count) + *nbytes &= 0xFFFF; } /* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ @@ -1793,8 +1811,21 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, } parm_data = (struct cifs_posix_lock *) ((char *)&pSMBr->hdr.Protocol + data_offset); - if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK)) + if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK)) pLockData->fl_type = F_UNLCK; + else { + if (parm_data->lock_type == + __constant_cpu_to_le16(CIFS_RDLCK)) + pLockData->fl_type = F_RDLCK; + else if (parm_data->lock_type == + __constant_cpu_to_le16(CIFS_WRLCK)) + pLockData->fl_type = F_WRLCK; + + pLockData->fl_start = parm_data->start; + pLockData->fl_end = parm_data->start + + parm_data->length - 1; + pLockData->fl_pid = parm_data->pid; + } } plk_err_exit: diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 45eb6cba793f..d9566bf8f917 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -23,6 +23,7 @@ #include <linux/string.h> #include <linux/list.h> #include <linux/wait.h> +#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/ctype.h> #include <linux/utsname.h> diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 87948147d7ec..6f8a0e3fb25b 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -23,6 +23,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/slab.h> #include <keys/user-type.h> #include "dns_resolve.h" #include "cifsglob.h" diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ca2ba7a0193c..9b11a8f56f3a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -31,6 +31,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/delay.h> #include <linux/mount.h> +#include <linux/slab.h> #include <asm/div64.h> #include "cifsfs.h" #include "cifspdu.h" @@ -838,8 +839,32 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) } else { /* if rc == ERR_SHARING_VIOLATION ? */ - rc = 0; /* do not change lock type to unlock - since range in use */ + rc = 0; + + if (lockType & LOCKING_ANDX_SHARED_LOCK) { + pfLock->fl_type = F_WRLCK; + } else { + rc = CIFSSMBLock(xid, tcon, netfid, length, + pfLock->fl_start, 0, 1, + lockType | LOCKING_ANDX_SHARED_LOCK, + 0 /* wait flag */); + if (rc == 0) { + rc = CIFSSMBLock(xid, tcon, netfid, + length, pfLock->fl_start, 1, 0, + lockType | + LOCKING_ANDX_SHARED_LOCK, + 0 /* wait flag */); + pfLock->fl_type = F_RDLCK; + if (rc != 0) + cERROR(1, ("Error unlocking " + "previously locked range %d " + "during test of lock", rc)); + rc = 0; + } else { + pfLock->fl_type = F_WRLCK; + rc = 0; + } + } } FreeXid(xid); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 723daaccbd0e..35ec11716213 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -20,6 +20,7 @@ */ #include <linux/fs.h> #include <linux/stat.h> +#include <linux/slab.h> #include <linux/pagemap.h> #include <asm/div64.h> #include "cifsfs.h" diff --git a/fs/cifs/link.c b/fs/cifs/link.c index fc1e0487eaee..c1a9d4236a8c 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -20,6 +20,7 @@ */ #include <linux/fs.h> #include <linux/stat.h> +#include <linux/slab.h> #include <linux/namei.h> #include "cifsfs.h" #include "cifspdu.h" diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index c343b14ba2d3..18e0bc1fb593 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -22,6 +22,7 @@ */ #include <linux/fs.h> #include <linux/pagemap.h> +#include <linux/slab.h> #include <linux/stat.h> #include "cifspdu.h" #include "cifsglob.h" diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index aaa9c1c5a5bd..7c3fd7463f44 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -29,6 +29,7 @@ #include "ntlmssp.h" #include "nterr.h" #include <linux/utsname.h> +#include <linux/slab.h> #include "cifs_spnego.h" extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 93fb09a99c69..192ea51af20f 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -24,6 +24,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 07b8e71544ee..ad081fe7eb18 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -22,6 +22,7 @@ #include <linux/fs.h> #include <linux/list.h> +#include <linux/gfp.h> #include <linux/wait.h> #include <linux/net.h> #include <linux/delay.h> diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 3e2ef0de1209..f555ce077d4f 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include <linux/posix_acl_xattr.h> +#include <linux/slab.h> #include "cifsfs.h" #include "cifspdu.h" #include "cifsglob.h" diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 4bb9d0a5decc..ccd98b0f2b0b 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/time.h> #include <linux/fs.h> +#include <linux/slab.h> #include <linux/file.h> #include <linux/stat.h> #include <linux/errno.h> diff --git a/fs/coda/file.c b/fs/coda/file.c index ffd42815fda1..4c813f2cdc52 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/smp_lock.h> #include <linux/string.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/coda.h> diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 830f51abb971..d97f9935a028 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -18,6 +18,7 @@ #include <linux/smp_lock.h> #include <linux/file.h> #include <linux/vfs.h> +#include <linux/slab.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -166,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) return -EBUSY; } + error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); + if (error) + goto bdi_err; + vc->vc_sb = sb; sb->s_fs_info = vc; @@ -174,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = 12; sb->s_magic = CODA_SUPER_MAGIC; sb->s_op = &coda_super_operations; + sb->s_bdi = &vc->bdi; /* get root fid from Venus: this needs the root inode */ error = venus_rootfid(sb, &fid); @@ -199,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) return 0; error: + bdi_destroy(&vc->bdi); + bdi_err: if (root) iput(root); if (vc) @@ -209,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) static void coda_put_super(struct super_block *sb) { + bdi_destroy(&coda_vcp(sb)->bdi); coda_vcp(sb)->vc_sb = NULL; sb->s_fs_info = NULL; diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index c274d949179d..f09c5ed76f6c 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -26,6 +26,7 @@ #include <linux/stat.h> #include <linux/errno.h> #include <linux/string.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/vmalloc.h> #include <linux/vfs.h> diff --git a/fs/compat.c b/fs/compat.c index 030602d453b7..4b6ed03cc478 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -49,6 +49,7 @@ #include <linux/mm.h> #include <linux/eventpoll.h> #include <linux/fs_struct.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6d55b61bfa79..641640dc7ae5 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -23,7 +23,6 @@ #include <linux/ioctl.h> #include <linux/if.h> #include <linux/if_bridge.h> -#include <linux/slab.h> #include <linux/raid/md_u.h> #include <linux/kd.h> #include <linux/route.h> @@ -60,6 +59,7 @@ #include <linux/i2c.h> #include <linux/i2c-dev.h> #include <linux/atalk.h> +#include <linux/gfp.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci.h> @@ -102,7 +102,6 @@ #include <linux/nbd.h> #include <linux/random.h> #include <linux/filter.h> -#include <linux/pktcdvd.h> #include <linux/hiddev.h> @@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE) COMPATIBLE_IOCTL(PPGETPHASE) COMPATIBLE_IOCTL(PPGETFLAGS) COMPATIBLE_IOCTL(PPSETFLAGS) -/* pktcdvd */ -COMPATIBLE_IOCTL(PACKET_CTRL_CMD) /* Big A */ /* sparc only */ /* Big Q for sound/OSS */ diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index a2f746066c5d..c8af2d91174b 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -34,6 +34,7 @@ #include <linux/capability.h> #include <linux/sched.h> #include <linux/lockdep.h> +#include <linux/slab.h> #include <linux/configfs.h> #include "configfs_internal.h" diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 8421cea7d8c7..8c8d64230c2d 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -29,6 +29,7 @@ #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/configfs.h> #include "configfs_internal.h" diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 32a5f46b1157..0f3eb41d9201 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -27,6 +27,7 @@ #include <linux/fs.h> #include <linux/module.h> #include <linux/namei.h> +#include <linux/slab.h> #include <linux/configfs.h> #include "configfs_internal.h" diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 049d6c36da09..30a87b3dbcac 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -27,6 +27,7 @@ #include <linux/fsnotify.h> #include <linux/string.h> #include <linux/magic.h> +#include <linux/slab.h> static struct vfsmount *debugfs_mount; static int debugfs_mount_count; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 8882ecc0f1bf..0120247b41c0 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -15,6 +15,7 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/namei.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/tty.h> #include <linux/mutex.h> diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 0df243850818..b54bca03d92f 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/configfs.h> +#include <linux/slab.h> #include <linux/in.h> #include <linux/in6.h> #include <net/ipv6.h> diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 29d6139c35fc..c6cf25158746 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/ctype.h> #include <linux/debugfs.h> +#include <linux/slab.h> #include "dlm_internal.h" #include "lock.h" diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 46ffd3eeaaf7..17903b491298 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -56,6 +56,7 @@ L: receive_xxxx_reply() <- R: send_xxxx_reply() */ #include <linux/types.h> +#include <linux/slab.h> #include "dlm_internal.h" #include <linux/dlm_device.h> #include "memory.h" diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 52cab160893c..c0d35c620526 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -51,6 +51,7 @@ #include <linux/file.h> #include <linux/mutex.h> #include <linux/sctp.h> +#include <linux/slab.h> #include <net/sctp/user.h> #include <net/ipv6.h> diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 052095cd592f..2c6ad518100d 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -9,6 +9,7 @@ #include <net/genetlink.h> #include <linux/dlm.h> #include <linux/dlm_netlink.h> +#include <linux/gfp.h> #include "dlm_internal.h" diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index b5f89aef3b29..d45c02db6943 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -11,6 +11,7 @@ #include <linux/poll.h> #include <linux/dlm.h> #include <linux/dlm_plock.h> +#include <linux/slab.h> #include "dlm_internal.h" #include "lockspace.h" diff --git a/fs/dlm/user.c b/fs/dlm/user.c index a4bfd31ac45b..8b6e73c47435 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -17,6 +17,7 @@ #include <linux/spinlock.h> #include <linux/dlm.h> #include <linux/dlm_device.h> +#include <linux/slab.h> #include "dlm_internal.h" #include "lockspace.h" diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 7cb0a59f4b9d..1cc087635a5e 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -33,6 +33,7 @@ #include <linux/crypto.h> #include <linux/file.h> #include <linux/scatterlist.h> +#include <linux/slab.h> #include <asm/unaligned.h> #include "ecryptfs_kernel.h" @@ -381,8 +382,8 @@ out: static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, struct ecryptfs_crypt_stat *crypt_stat) { - (*offset) = (crypt_stat->num_header_bytes_at_front - + (crypt_stat->extent_size * extent_num)); + (*offset) = ecryptfs_lower_header_size(crypt_stat) + + (crypt_stat->extent_size * extent_num); } /** @@ -834,13 +835,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) set_extent_mask_and_shift(crypt_stat); crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - crypt_stat->num_header_bytes_at_front = 0; + crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; else { if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) - crypt_stat->num_header_bytes_at_front = + crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; else - crypt_stat->num_header_bytes_at_front = PAGE_CACHE_SIZE; + crypt_stat->metadata_size = PAGE_CACHE_SIZE; } } @@ -1107,9 +1108,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written) (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; } -static void -write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, - size_t *written) +void ecryptfs_write_crypt_stat_flags(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + size_t *written) { u32 flags = 0; int i; @@ -1237,8 +1238,7 @@ ecryptfs_write_header_metadata(char *virt, header_extent_size = (u32)crypt_stat->extent_size; num_header_extents_at_front = - (u16)(crypt_stat->num_header_bytes_at_front - / crypt_stat->extent_size); + (u16)(crypt_stat->metadata_size / crypt_stat->extent_size); put_unaligned_be32(header_extent_size, virt); virt += 4; put_unaligned_be16(num_header_extents_at_front, virt); @@ -1291,7 +1291,8 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max, offset = ECRYPTFS_FILE_SIZE_BYTES; write_ecryptfs_marker((page_virt + offset), &written); offset += written; - write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); + ecryptfs_write_crypt_stat_flags((page_virt + offset), crypt_stat, + &written); offset += written; ecryptfs_write_header_metadata((page_virt + offset), crypt_stat, &written); @@ -1381,7 +1382,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) rc = -EINVAL; goto out; } - virt_len = crypt_stat->num_header_bytes_at_front; + virt_len = crypt_stat->metadata_size; order = get_order(virt_len); /* Released in this function */ virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order); @@ -1427,16 +1428,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, header_extent_size = get_unaligned_be32(virt); virt += sizeof(__be32); num_header_extents_at_front = get_unaligned_be16(virt); - crypt_stat->num_header_bytes_at_front = - (((size_t)num_header_extents_at_front - * (size_t)header_extent_size)); + crypt_stat->metadata_size = (((size_t)num_header_extents_at_front + * (size_t)header_extent_size)); (*bytes_read) = (sizeof(__be32) + sizeof(__be16)); if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) - && (crypt_stat->num_header_bytes_at_front + && (crypt_stat->metadata_size < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { rc = -EINVAL; printk(KERN_WARNING "Invalid header size: [%zd]\n", - crypt_stat->num_header_bytes_at_front); + crypt_stat->metadata_size); } return rc; } @@ -1451,8 +1451,7 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, */ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) { - crypt_stat->num_header_bytes_at_front = - ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; + crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; } /** @@ -1606,6 +1605,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry, ECRYPTFS_VALIDATE_HEADER_SIZE); if (rc) { + memset(page_virt, 0, PAGE_CACHE_SIZE); rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); if (rc) { printk(KERN_DEBUG "Valid eCryptfs headers not found in " diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 8f006a0d6076..906e803f7f79 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -26,6 +26,7 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/fs_stack.h> +#include <linux/slab.h> #include "ecryptfs_kernel.h" /** diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 542f625312f3..bfc2e0f78f00 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -35,6 +35,7 @@ #include <linux/scatterlist.h> #include <linux/hash.h> #include <linux/nsproxy.h> +#include <linux/backing-dev.h> /* Version verification for shared data structures w/ userspace */ #define ECRYPTFS_VERSION_MAJOR 0x00 @@ -273,7 +274,7 @@ struct ecryptfs_crypt_stat { u32 flags; unsigned int file_version; size_t iv_bytes; - size_t num_header_bytes_at_front; + size_t metadata_size; size_t extent_size; /* Data extent size; default is 4096 */ size_t key_size; size_t extent_shift; @@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat { struct ecryptfs_sb_info { struct super_block *wsi_sb; struct ecryptfs_mount_crypt_stat mount_crypt_stat; + struct backing_dev_info bdi; }; /* file private data. */ @@ -464,6 +466,14 @@ struct ecryptfs_daemon { extern struct mutex ecryptfs_daemon_hash_mux; +static inline size_t +ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) +{ + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) + return 0; + return crypt_stat->metadata_size; +} + static inline struct ecryptfs_file_info * ecryptfs_file_to_private(struct file *file) { @@ -651,6 +661,9 @@ int ecryptfs_decrypt_page(struct page *page); int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); +void ecryptfs_write_crypt_stat_flags(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + size_t *written); int ecryptfs_read_and_validate_header_region(char *data, struct inode *ecryptfs_inode); int ecryptfs_read_and_validate_xattr_region(char *page_virt, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 678172b61be2..e7440a6f5ebf 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -25,6 +25,7 @@ #include <linux/file.h> #include <linux/poll.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/security.h> diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 4a430ab4115c..e2d4418affac 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -31,6 +31,7 @@ #include <linux/mount.h> #include <linux/crypto.h> #include <linux/fs_stack.h> +#include <linux/slab.h> #include <asm/unaligned.h> #include "ecryptfs_kernel.h" @@ -323,6 +324,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, rc = ecryptfs_read_and_validate_header_region(page_virt, ecryptfs_dentry->d_inode); if (rc) { + memset(page_virt, 0, PAGE_CACHE_SIZE); rc = ecryptfs_read_and_validate_xattr_region(page_virt, ecryptfs_dentry); if (rc) { @@ -335,7 +337,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, ecryptfs_dentry->d_sb)->mount_crypt_stat; if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - file_size = (crypt_stat->num_header_bytes_at_front + file_size = (crypt_stat->metadata_size + i_size_read(lower_dentry->d_inode)); else file_size = i_size_read(lower_dentry->d_inode); @@ -387,9 +389,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " - "lower_dentry = [%s]\n", __func__, rc, - ecryptfs_dentry->d_name.name); + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " + "[%d] on lower_dentry = [%s]\n", __func__, rc, + encrypted_and_encoded_name); goto out_d_drop; } if (lower_dentry->d_inode) @@ -416,9 +418,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " - "lower_dentry = [%s]\n", __func__, rc, - encrypted_and_encoded_name); + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " + "[%d] on lower_dentry = [%s]\n", __func__, rc, + encrypted_and_encoded_name); goto out_d_drop; } lookup_and_interpose: @@ -455,8 +457,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); if (rc) goto out_lock; - fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); - fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); + fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); old_dentry->d_inode->i_nlink = ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; i_size_write(new_dentry->d_inode, file_size_save); @@ -647,38 +649,17 @@ out_lock: return rc; } -static int -ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, + size_t *bufsiz) { + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); char *lower_buf; - size_t lower_bufsiz; - struct dentry *lower_dentry; - struct ecryptfs_mount_crypt_stat *mount_crypt_stat; - char *plaintext_name; - size_t plaintext_name_size; + size_t lower_bufsiz = PATH_MAX; mm_segment_t old_fs; int rc; - lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (!lower_dentry->d_inode->i_op->readlink) { - rc = -EINVAL; - goto out; - } - mount_crypt_stat = &ecryptfs_superblock_to_private( - dentry->d_sb)->mount_crypt_stat; - /* - * If the lower filename is encrypted, it will result in a significantly - * longer name. If needed, truncate the name after decode and decrypt. - */ - if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) - lower_bufsiz = PATH_MAX; - else - lower_bufsiz = bufsiz; - /* Released in this function */ lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); - if (lower_buf == NULL) { - printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc [%zd] bytes\n", __func__, lower_bufsiz); + if (!lower_buf) { rc = -ENOMEM; goto out; } @@ -688,29 +669,31 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) (char __user *)lower_buf, lower_bufsiz); set_fs(old_fs); - if (rc >= 0) { - rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, - &plaintext_name_size, - dentry, lower_buf, - rc); - if (rc) { - printk(KERN_ERR "%s: Error attempting to decode and " - "decrypt filename; rc = [%d]\n", __func__, - rc); - goto out_free_lower_buf; - } - /* Check for bufsiz <= 0 done in sys_readlinkat() */ - rc = copy_to_user(buf, plaintext_name, - min((size_t) bufsiz, plaintext_name_size)); - if (rc) - rc = -EFAULT; - else - rc = plaintext_name_size; - kfree(plaintext_name); - fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); - } -out_free_lower_buf: + if (rc < 0) + goto out; + lower_bufsiz = rc; + rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, + lower_buf, lower_bufsiz); +out: kfree(lower_buf); + return rc; +} + +static int +ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ + char *kbuf; + size_t kbufsiz, copied; + int rc; + + rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz); + if (rc) + goto out; + copied = min_t(size_t, bufsiz, kbufsiz); + rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied; + kfree(kbuf); + fsstack_copy_attr_atime(dentry->d_inode, + ecryptfs_dentry_to_lower(dentry)->d_inode); out: return rc; } @@ -768,7 +751,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, { loff_t lower_size; - lower_size = crypt_stat->num_header_bytes_at_front; + lower_size = ecryptfs_lower_header_size(crypt_stat); if (upper_size != 0) { loff_t num_extents; @@ -1015,6 +998,28 @@ out: return rc; } +int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + int rc = 0; + + mount_crypt_stat = &ecryptfs_superblock_to_private( + dentry->d_sb)->mount_crypt_stat; + generic_fillattr(dentry->d_inode, stat); + if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { + char *target; + size_t targetsiz; + + rc = ecryptfs_readlink_lower(dentry, &target, &targetsiz); + if (!rc) { + kfree(target); + stat->size = targetsiz; + } + } + return rc; +} + int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -1039,7 +1044,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->setxattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1057,7 +1062,7 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, int rc = 0; if (!lower_dentry->d_inode->i_op->getxattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1084,7 +1089,7 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->listxattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1101,7 +1106,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name) lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->removexattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1132,6 +1137,7 @@ const struct inode_operations ecryptfs_symlink_iops = { .put_link = ecryptfs_put_link, .permission = ecryptfs_permission, .setattr = ecryptfs_setattr, + .getattr = ecryptfs_getattr_link, .setxattr = ecryptfs_setxattr, .getxattr = ecryptfs_getxattr, .listxattr = ecryptfs_listxattr, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index a0a7847567e9..89c5476506ef 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -32,6 +32,7 @@ #include <linux/random.h> #include <linux/crypto.h> #include <linux/scatterlist.h> +#include <linux/slab.h> #include "ecryptfs_kernel.h" /** diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index e14cf7e588db..d8c3a373aafa 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -22,6 +22,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/slab.h> #include <linux/wait.h> #include <linux/mount.h> #include "ecryptfs_kernel.h" diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index ea2f92101dfe..760983d0f25e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -35,6 +35,7 @@ #include <linux/key.h> #include <linux/parser.h> #include <linux/fs_stack.h> +#include <linux/slab.h> #include "ecryptfs_kernel.h" /** @@ -496,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache; static int ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) { + struct ecryptfs_sb_info *esi; int rc = 0; /* Released in ecryptfs_put_super() */ ecryptfs_set_superblock_private(sb, kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL)); - if (!ecryptfs_superblock_to_private(sb)) { + esi = ecryptfs_superblock_to_private(sb); + if (!esi) { ecryptfs_printk(KERN_WARNING, "Out of memory\n"); rc = -ENOMEM; goto out; } + + rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); + if (rc) + goto out; + + sb->s_bdi = &esi->bdi; sb->s_op = &ecryptfs_sops; /* Released through deactivate_super(sb) from get_sb_nodev */ sb->s_root = d_alloc(NULL, &(const struct qstr) { diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index f1c17e87c5fb..2d8dbce9d485 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -20,6 +20,7 @@ * 02111-1307, USA. */ #include <linux/sched.h> +#include <linux/slab.h> #include <linux/user_namespace.h> #include <linux/nsproxy.h> #include "ecryptfs_kernel.h" diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 4ec8f61ccf5a..3745f612bcd4 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -24,6 +24,7 @@ #include <linux/random.h> #include <linux/miscdevice.h> #include <linux/poll.h> +#include <linux/slab.h> #include <linux/wait.h> #include <linux/module.h> #include "ecryptfs_kernel.h" diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index df4ce99d0597..2ee9a3a7b68c 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -32,6 +32,7 @@ #include <linux/file.h> #include <linux/crypto.h> #include <linux/scatterlist.h> +#include <linux/slab.h> #include <asm/unaligned.h> #include "ecryptfs_kernel.h" @@ -82,6 +83,19 @@ out: return rc; } +static void strip_xattr_flag(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat) +{ + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + size_t written; + + crypt_stat->flags &= ~ECRYPTFS_METADATA_IN_XATTR; + ecryptfs_write_crypt_stat_flags(page_virt, crypt_stat, + &written); + crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; + } +} + /** * Header Extent: * Octets 0-7: Unencrypted file size (big-endian) @@ -97,19 +111,6 @@ out: * (big-endian) * Octet 26: Begin RFC 2440 authentication token packet set */ -static void set_header_info(char *page_virt, - struct ecryptfs_crypt_stat *crypt_stat) -{ - size_t written; - size_t save_num_header_bytes_at_front = - crypt_stat->num_header_bytes_at_front; - - crypt_stat->num_header_bytes_at_front = - ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; - ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written); - crypt_stat->num_header_bytes_at_front = - save_num_header_bytes_at_front; -} /** * ecryptfs_copy_up_encrypted_with_header @@ -135,8 +136,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, * num_extents_per_page) + extent_num_in_page); size_t num_header_extents_at_front = - (crypt_stat->num_header_bytes_at_front - / crypt_stat->extent_size); + (crypt_stat->metadata_size / crypt_stat->extent_size); if (view_extent_num < num_header_extents_at_front) { /* This is a header extent */ @@ -146,9 +146,14 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, memset(page_virt, 0, PAGE_CACHE_SIZE); /* TODO: Support more than one header extent */ if (view_extent_num == 0) { + size_t written; + rc = ecryptfs_read_xattr_region( page_virt, page->mapping->host); - set_header_info(page_virt, crypt_stat); + strip_xattr_flag(page_virt + 16, crypt_stat); + ecryptfs_write_header_metadata(page_virt + 20, + crypt_stat, + &written); } kunmap_atomic(page_virt, KM_USER0); flush_dcache_page(page); @@ -161,7 +166,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, /* This is an encrypted data extent */ loff_t lower_offset = ((view_extent_num * crypt_stat->extent_size) - - crypt_stat->num_header_bytes_at_front); + - crypt_stat->metadata_size); rc = ecryptfs_read_lower_page_segment( page, (lower_offset >> PAGE_CACHE_SHIFT), diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index b15a43a80ab7..0c0ae491d231 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -26,6 +26,7 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/key.h> +#include <linux/slab.h> #include <linux/seq_file.h> #include <linux/smp_lock.h> #include <linux/file.h> @@ -85,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) if (lower_dentry->d_inode) { fput(inode_info->lower_file); inode_info->lower_file = NULL; - d_drop(lower_dentry); } } ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); @@ -122,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb) lock_kernel(); ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); + bdi_destroy(&sb_info->bdi); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); ecryptfs_set_superblock_private(sb, NULL); diff --git a/fs/eventfd.c b/fs/eventfd.c index 7758cc382ef0..6bd3f76fdf88 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -11,6 +11,7 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/anon_inodes.h> diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 8442e353309f..22721b2fd890 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -35,6 +35,7 @@ #include <linux/fs.h> #include <linux/time.h> +#include <linux/backing-dev.h> #include "common.h" /* FIXME: Remove once pnfs hits mainline @@ -84,6 +85,7 @@ struct exofs_sb_info { u32 s_next_generation; /* next gen # to use */ atomic_t s_curr_pending; /* number of pending commands */ uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ + struct backing_dev_info bdi; /* register our bdi with VFS */ struct pnfs_osd_data_map data_map; /* Default raid to use * FIXME: Needed ? diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index a17e4b733e35..76d2a79ef93e 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -31,6 +31,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <linux/slab.h> #include <linux/writeback.h> #include <linux/buffer_head.h> #include <scsi/scsi_device.h> diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 5293bc411d17..4337cad7777b 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -22,6 +22,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <linux/slab.h> #include <scsi/scsi_device.h> #include <asm/div64.h> diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 6cf5e4e84d61..03149b9a5178 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -37,6 +37,7 @@ #include <linux/vfs.h> #include <linux/random.h> #include <linux/exportfs.h> +#include <linux/slab.h> #include "exofs.h" @@ -301,6 +302,7 @@ static void exofs_put_super(struct super_block *sb) _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], sbi->layout.s_pid); + bdi_destroy(&sbi->bdi); exofs_free_sbi(sbi); sb->s_fs_info = NULL; } @@ -545,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; + ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); + if (ret) + goto free_bdi; + /* use mount options to fill superblock */ od = osduld_path_lookup(opts->dev_name); if (IS_ERR(od)) { @@ -611,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) } /* set up operation vectors */ + sb->s_bdi = &sbi->bdi; sb->s_fs_info = sbi; sb->s_op = &exofs_sops; sb->s_export_op = &exofs_export_ops; @@ -642,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) return 0; free_sbi: + bdi_destroy(&sbi->bdi); +free_bdi: EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", opts->dev_name, sbi->layout.s_pid, ret); exofs_free_sbi(sbi); diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 1d081f0cfec2..3cf038c055d7 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -13,6 +13,7 @@ #include "ext2.h" #include <linux/quotaops.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/buffer_head.h> #include <linux/capability.h> diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 4e2426e22bbe..565cf817bbf1 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -32,6 +32,7 @@ const struct inode_operations ext2_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .setattr = ext2_setattr, #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -43,6 +44,7 @@ const struct inode_operations ext2_symlink_inode_operations = { const struct inode_operations ext2_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ext2_follow_link, + .setattr = ext2_setattr, #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index c8155845ac05..b118c6383c6d 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -4,6 +4,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/fs.h> #include <linux/ext2_fs.h> diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 161da2d3f890..a177122a1b25 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -14,6 +14,7 @@ #include <linux/time.h> #include <linux/capability.h> #include <linux/fs.h> +#include <linux/slab.h> #include <linux/jbd.h> #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index ef9008b885b5..0d0e97ed3ff6 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -582,7 +582,9 @@ got: inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - ei->i_state = EXT3_STATE_NEW; + ei->i_state_flags = 0; + ext3_set_inode_state(inode, EXT3_STATE_NEW); + ei->i_extra_isize = (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 7f920b7263a4..ea33bdf0a300 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - ei->i_state = 0; + ei->i_state_flags = 0; ei->i_dir_start_lookup = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c index ff7b4ccd8983..7c4898207776 100644 --- a/fs/ext3/symlink.c +++ b/fs/ext3/symlink.c @@ -34,6 +34,7 @@ const struct inode_operations ext3_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .setattr = ext3_setattr, #ifdef CONFIG_EXT3_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -45,6 +46,7 @@ const struct inode_operations ext3_symlink_inode_operations = { const struct inode_operations ext3_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ext3_follow_link, + .setattr = ext3_setattr, #ifdef CONFIG_EXT3_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 474348788dd9..3af91f476dff 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c @@ -4,6 +4,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/fs.h> #include <linux/ext3_jbd.h> diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 983f0e127493..538c48655084 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -18,6 +18,7 @@ #include <linux/pagemap.h> #include <linux/blkdev.h> #include <linux/mutex.h> +#include <linux/slab.h> #include "ext4.h" struct ext4_system_zone { diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94c8ee81f5e1..236b834b4ca8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode, physical += offset; length = EXT4_SB(inode->i_sb)->s_inode_size - offset; flags |= FIEMAP_EXTENT_DATA_INLINE; + brelse(iloc.bh); } else { /* external block */ physical = EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 361c0b9962a8..57f6eef6ccd6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -263,7 +263,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ext4_group_t f; f = ext4_flex_group(sbi, block_group); - atomic_dec(&sbi->s_flex_groups[f].free_inodes); + atomic_dec(&sbi->s_flex_groups[f].used_dirs); } } @@ -773,7 +773,7 @@ static int ext4_claim_inode(struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + atomic_inc(&sbi->s_flex_groups[f].used_dirs); } } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 986120f30066..81d605412844 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -39,6 +39,7 @@ #include <linux/bio.h> #include <linux/workqueue.h> #include <linux/kernel.h> +#include <linux/slab.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -1035,7 +1036,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, sector_t lblock) { struct ext4_inode_info *ei = EXT4_I(inode); - int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; + sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); int blk_bits; if (lblock < EXT4_NDIR_BLOCKS) @@ -1050,7 +1051,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, } ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; ei->i_da_metadata_calc_len = 1; - blk_bits = roundup_pow_of_two(lblock + 1); + blk_bits = order_base_2(lblock); return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; } @@ -5374,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) } else { struct ext4_iloc iloc; - err = ext4_get_inode_loc(inode, &iloc); + err = __ext4_get_inode_loc(inode, &iloc, 0); if (err) return err; if (wbc->sync_mode == WB_SYNC_ALL) @@ -5385,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) (unsigned long long)iloc.bh->b_blocknr); err = -EIO; } + brelse(iloc.bh); } return err; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 54df209d2eed..b423a364dca3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -23,6 +23,7 @@ #include "mballoc.h" #include <linux/debugfs.h> +#include <linux/slab.h> #include <trace/events/ext4.h> /* @@ -2534,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) mb_debug(1, "gonna free %u blocks in group %u (0x%p):", entry->count, entry->group, entry); + if (test_opt(sb, DISCARD)) { + ext4_fsblk_t discard_block; + + discard_block = entry->start_blk + + ext4_group_first_block_no(sb, entry->group); + trace_ext4_discard_blocks(sb, + (unsigned long long)discard_block, + entry->count); + sb_issue_discard(sb, discard_block, entry->count); + } + err = ext4_mb_load_buddy(sb, entry->group, &e4b); /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); @@ -2555,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) page_cache_release(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->group); - if (test_opt(sb, DISCARD)) { - ext4_fsblk_t discard_block; - - discard_block = entry->start_blk + - ext4_group_first_block_no(sb, entry->group); - trace_ext4_discard_blocks(sb, - (unsigned long long)discard_block, - entry->count); - sb_issue_discard(sb, discard_block, entry->count); - } kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); } diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 8b87bd0eac95..34dcfc52ef44 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -13,6 +13,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include "ext4_jbd2.h" #include "ext4_extents.h" diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index aa5fe28d180f..d1fc662cc311 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -15,6 +15,7 @@ #include <linux/fs.h> #include <linux/quotaops.h> +#include <linux/slab.h> #include "ext4_jbd2.h" #include "ext4_extents.h" #include "ext4.h" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba191dae8730..e14d22c170d5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -68,7 +68,21 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); +static int ext4_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt); +#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext3_fs_type = { + .owner = THIS_MODULE, + .name = "ext3", + .get_sb = ext4_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) +#else +#define IS_EXT3_SB(sb) (0) +#endif ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) @@ -2539,7 +2553,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * enable delayed allocation by default * Use -o nodelalloc to turn it off */ - set_opt(sbi->s_mount_opt, DELALLOC); + if (!IS_EXT3_SB(sb)) + set_opt(sbi->s_mount_opt, DELALLOC); if (!parse_options((char *) data, sb, &journal_devnum, &journal_ioprio, NULL, 0)) @@ -4068,7 +4083,7 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags, return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } -#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", @@ -4095,15 +4110,7 @@ static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } #endif -#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) -static struct file_system_type ext3_fs_type = { - .owner = THIS_MODULE, - .name = "ext3", - .get_sb = ext4_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - +#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static inline void register_as_ext3(void) { int err = register_filesystem(&ext3_fs_type); diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 983c253999a7..8b145e98df07 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -7,6 +7,7 @@ #include <linux/string.h> #include <linux/fs.h> #include <linux/security.h> +#include <linux/slab.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 923990e4f16e..113f0a1e565d 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -9,6 +9,7 @@ */ #include <linux/fs.h> +#include <linux/slab.h> #include <linux/buffer_head.h> #include "fat.h" diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c1ef50154868..6fcc7e71fbaa 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -309,7 +309,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, { struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options; wchar_t *ip, *ext_start, *end, *name_start; - unsigned char base[9], ext[4], buf[8], *p; + unsigned char base[9], ext[4], buf[5], *p; unsigned char charbuf[NLS_MAX_CHARSET_SIZE]; int chl, chi; int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen; @@ -467,7 +467,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, return 0; } - i = jiffies & 0xffff; + i = jiffies; sz = (jiffies >> 16) & 0x7; if (baselen > 2) { baselen = numtail2_baselen; @@ -476,7 +476,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls, name_res[baselen + 4] = '~'; name_res[baselen + 5] = '1' + sz; while (1) { - sprintf(buf, "%04X", i); + snprintf(buf, sizeof(buf), "%04X", i & 0xffff); memcpy(&name_res[baselen], buf, 4); if (vfat_find_form(dir, name_res) < 0) break; diff --git a/fs/fifo.c b/fs/fifo.c index f8f97b8b6d44..5d6606ffc2d2 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -10,7 +10,6 @@ */ #include <linux/mm.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/pipe_fs_i.h> diff --git a/fs/filesystems.c b/fs/filesystems.c index a24c58e181db..68ba492d8eef 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -10,10 +10,10 @@ #include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/slab.h> #include <linux/kmod.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/slab.h> #include <asm/uaccess.h> /* diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index ed8f0b0dd880..1429f3ae1e86 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c @@ -33,7 +33,6 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/pagemap.h> #include "vxfs_extern.h" diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 76fc4d594acb..4b37f7cea4dd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/mm.h> @@ -553,108 +554,85 @@ select_queue: return ret; } -static void unpin_sb_for_writeback(struct super_block **psb) +static void unpin_sb_for_writeback(struct super_block *sb) { - struct super_block *sb = *psb; - - if (sb) { - up_read(&sb->s_umount); - put_super(sb); - *psb = NULL; - } + up_read(&sb->s_umount); + put_super(sb); } +enum sb_pin_state { + SB_PINNED, + SB_NOT_PINNED, + SB_PIN_FAILED +}; + /* * For WB_SYNC_NONE writeback, the caller does not have the sb pinned * before calling writeback. So make sure that we do pin it, so it doesn't * go away while we are writing inodes from it. - * - * Returns 0 if the super was successfully pinned (or pinning wasn't needed), - * 1 if we failed. */ -static int pin_sb_for_writeback(struct writeback_control *wbc, - struct inode *inode, struct super_block **psb) +static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, + struct super_block *sb) { - struct super_block *sb = inode->i_sb; - - /* - * If this sb is already pinned, nothing more to do. If not and - * *psb is non-NULL, unpin the old one first - */ - if (sb == *psb) - return 0; - else if (*psb) - unpin_sb_for_writeback(psb); - /* * Caller must already hold the ref for this */ if (wbc->sync_mode == WB_SYNC_ALL) { WARN_ON(!rwsem_is_locked(&sb->s_umount)); - return 0; + return SB_NOT_PINNED; } - spin_lock(&sb_lock); sb->s_count++; if (down_read_trylock(&sb->s_umount)) { if (sb->s_root) { spin_unlock(&sb_lock); - goto pinned; + return SB_PINNED; } /* * umounted, drop rwsem again and fall through to failure */ up_read(&sb->s_umount); } - sb->s_count--; spin_unlock(&sb_lock); - return 1; -pinned: - *psb = sb; - return 0; + return SB_PIN_FAILED; } -static void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +/* + * Write a portion of b_io inodes which belong to @sb. + * If @wbc->sb != NULL, then find and write all such + * inodes. Otherwise write only ones which go sequentially + * in reverse order. + * Return 1, if the caller writeback routine should be + * interrupted. Otherwise return 0. + */ +static int writeback_sb_inodes(struct super_block *sb, + struct bdi_writeback *wb, + struct writeback_control *wbc) { - struct super_block *sb = wbc->sb, *pin_sb = NULL; - const unsigned long start = jiffies; /* livelock avoidance */ - - spin_lock(&inode_lock); - - if (!wbc->for_kupdate || list_empty(&wb->b_io)) - queue_io(wb, wbc->older_than_this); - while (!list_empty(&wb->b_io)) { - struct inode *inode = list_entry(wb->b_io.prev, - struct inode, i_list); long pages_skipped; - - /* - * super block given and doesn't match, skip this inode - */ - if (sb && sb != inode->i_sb) { + struct inode *inode = list_entry(wb->b_io.prev, + struct inode, i_list); + if (wbc->sb && sb != inode->i_sb) { + /* super block given and doesn't + match, skip this inode */ redirty_tail(inode); continue; } - + if (sb != inode->i_sb) + /* finish with this superblock */ + return 0; if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; } - /* * Was this inode dirtied after sync_sb_inodes was called? * This keeps sync from extra jobs and livelock. */ - if (inode_dirtied_after(inode, start)) - break; - - if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { - requeue_io(inode); - continue; - } + if (inode_dirtied_after(inode, wbc->wb_start)) + return 1; BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); __iget(inode); @@ -673,14 +651,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, spin_lock(&inode_lock); if (wbc->nr_to_write <= 0) { wbc->more_io = 1; - break; + return 1; } if (!list_empty(&wb->b_more_io)) wbc->more_io = 1; } + /* b_io is empty */ + return 1; +} + +static void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc) +{ + int ret = 0; - unpin_sb_for_writeback(&pin_sb); + wbc->wb_start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&wb->b_io)) + queue_io(wb, wbc->older_than_this); + + while (!list_empty(&wb->b_io)) { + struct inode *inode = list_entry(wb->b_io.prev, + struct inode, i_list); + struct super_block *sb = inode->i_sb; + enum sb_pin_state state; + + if (wbc->sb && sb != wbc->sb) { + /* super block given and doesn't + match, skip this inode */ + redirty_tail(inode); + continue; + } + state = pin_sb_for_writeback(wbc, sb); + + if (state == SB_PIN_FAILED) { + requeue_io(inode); + continue; + } + ret = writeback_sb_inodes(sb, wb, wbc); + if (state == SB_PINNED) + unpin_sb_for_writeback(sb); + if (ret) + break; + } spin_unlock(&inode_lock); /* Leave any unwritten inodes on b_io */ } diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 3221a0c7944e..1e1f286dd70e 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -12,6 +12,7 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include <linux/module.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <linux/key.h> #include <keys/user-type.h> #include "internal.h" diff --git a/fs/fscache/object.c b/fs/fscache/object.c index e513ac599c8e..0b589a9b4ffc 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { static void fscache_object_slow_work_put_ref(struct slow_work *); static int fscache_object_slow_work_get_ref(struct slow_work *); static void fscache_object_slow_work_execute(struct slow_work *); -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); #endif static void fscache_initialise_object(struct fscache_object *); @@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = { .get_ref = fscache_object_slow_work_get_ref, .put_ref = fscache_object_slow_work_put_ref, .execute = fscache_object_slow_work_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = fscache_object_slow_work_desc, #endif }; @@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work) /* * describe an object for slow-work debugging */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG static void fscache_object_slow_work_desc(struct slow_work *work, struct seq_file *m) { diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 313e79a14266..f17cecafae44 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -14,6 +14,7 @@ #define FSCACHE_DEBUG_LEVEL OPERATION #include <linux/module.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include "internal.h" atomic_t fscache_op_debug_id; @@ -500,7 +501,7 @@ static void fscache_op_execute(struct slow_work *work) /* * describe an operation for slow-work debugging */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG static void fscache_op_desc(struct slow_work *work, struct seq_file *m) { struct fscache_operation *op = @@ -517,7 +518,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = { .get_ref = fscache_op_get_ref, .put_ref = fscache_op_put_ref, .execute = fscache_op_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = fscache_op_desc, #endif }; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index c598ea4c4e7d..47aefd376e54 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -14,6 +14,7 @@ #include <linux/fscache-cache.h> #include <linux/buffer_head.h> #include <linux/pagevec.h> +#include <linux/slab.h> #include "internal.h" /* @@ -881,6 +882,7 @@ submit_failed: goto nobufs; nobufs_unlock_obj: + spin_unlock(&cookie->stores_lock); spin_unlock(&object->lock); nobufs: spin_unlock(&cookie->lock); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 46435f3aae68..4765190d537f 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -165,8 +165,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_object_lookups), atomic_read(&fscache_n_object_lookups_negative), atomic_read(&fscache_n_object_lookups_positive), - atomic_read(&fscache_n_object_lookups_timed_out), - atomic_read(&fscache_n_object_created)); + atomic_read(&fscache_n_object_created), + atomic_read(&fscache_n_object_lookups_timed_out)); seq_printf(m, "Updates: n=%u nul=%u run=%u\n", atomic_read(&fscache_n_updates), diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index de792dcf3274..e1f8171278bd 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -44,6 +44,7 @@ #include <linux/magic.h> #include <linux/miscdevice.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stat.h> diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 55458031e501..fe5df5457656 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -7,6 +7,7 @@ */ #include <linux/sched.h> +#include <linux/gfp.h> #include <linux/fs.h> #include <linux/generic_acl.h> #include <linux/posix_acl.h> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 583e823307ae..5e411d5f4697 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 91beddadd388..bb7907bde3d8 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index d15876e9aa26..c22c21174833 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 38e3749d476c..49f97d3bb690 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 569b46240f61..0e0470ed34c2 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/dlm.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/gfs2_ondisk.h> diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4106ddaaa98..f07119d89557 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -10,6 +10,8 @@ #ifndef __RGRP_DOT_H__ #define __RGRP_DOT_H__ +#include <linux/slab.h> + struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 419042f7f0b6..54fd98425991 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -8,7 +8,6 @@ */ #include <linux/sched.h> -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 226f2bfbf16a..53511291fe36 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -7,7 +7,6 @@ * of the GNU General Public License version 2. */ -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 0d200068d0af..cdb41a1f6a64 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -9,6 +9,7 @@ */ #include <linux/pagemap.h> +#include <linux/slab.h> #include <linux/swap.h> #include "btree.h" diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 052f214ea6f0..38a0a9917d7f 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -9,6 +9,7 @@ */ #include <linux/pagemap.h> +#include <linux/slab.h> #include <linux/log2.h> #include "btree.h" diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 8bbe03c3f6d5..86428f5ac991 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -11,6 +11,7 @@ #include <linux/cdrom.h> #include <linux/genhd.h> #include <linux/nls.h> +#include <linux/slab.h> #include "hfs_fs.h" #include "btree.h" diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 5ed7252b7b23..0a81eb7111f3 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -19,6 +19,7 @@ #include <linux/nls.h> #include <linux/parser.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/vfs.h> diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 3fcbb0e1f6fc..572628b4b07d 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -15,6 +15,7 @@ #include <linux/nls.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include "hfsplus_fs.h" enum { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 032604e5ef2c..3a029d8f4cf1 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/statfs.h> +#include <linux/slab.h> #include <linux/seq_file.h> #include <linux/mount.h> #include "hostfs.h" diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index b6fca543544c..eac5f96323e3 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c @@ -6,6 +6,7 @@ * general buffer i/o */ #include <linux/sched.h> +#include <linux/slab.h> #include "hpfs_fn.h" void hpfs_lock_creation(struct super_block *s) diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 26e3964a4b8c..2338130cceba 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -7,6 +7,7 @@ */ #include <linux/smp_lock.h> +#include <linux/slab.h> #include "hpfs_fn.h" static int hpfs_dir_release(struct inode *inode, struct file *filp) diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index ff90affb94e1..1042a9bc97f3 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -7,6 +7,7 @@ */ #include <linux/smp_lock.h> +#include <linux/slab.h> #include "hpfs_fn.h" void hpfs_init_inode(struct inode *i) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index cadc4ce48656..aa53842c599c 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -15,6 +15,7 @@ #include <linux/sched.h> #include <linux/smp_lock.h> #include <linux/bitmap.h> +#include <linux/slab.h> /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ diff --git a/fs/ioctl.c b/fs/ioctl.c index 6c751106c2e5..7faefb4da939 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) #ifdef CONFIG_BLOCK -#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) -#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); +static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) +{ + return (offset >> inode->i_blkbits); +} + +static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) +{ + return (blk << inode->i_blkbits); +} /** * __generic_block_fiemap - FIEMAP for block based inodes (no locking) - * @inode - the inode to map - * @arg - the pointer to userspace where we copy everything to - * @get_block - the fs's get_block function + * @inode: the inode to map + * @fieinfo: the fiemap info struct that will be passed back to userspace + * @start: where to start mapping in the inode + * @len: how much space to map + * @get_block: the fs's get_block function * * This does FIEMAP for block based inodes. Basically it will just loop * through get_block until we hit the number of extents we want to map, or we @@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) */ int __generic_block_fiemap(struct inode *inode, - struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block) + struct fiemap_extent_info *fieinfo, loff_t start, + loff_t len, get_block_t *get_block) { - struct buffer_head tmp; - unsigned long long start_blk; - long long length = 0, map_len = 0; + struct buffer_head map_bh; + sector_t start_blk, last_blk; + loff_t isize = i_size_read(inode); u64 logical = 0, phys = 0, size = 0; u32 flags = FIEMAP_EXTENT_MERGED; - int ret = 0, past_eof = 0, whole_file = 0; + bool past_eof = false, whole_file = false; + int ret = 0; - if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + if (ret) return ret; - start_blk = logical_to_blk(inode, start); - - length = (long long)min_t(u64, len, i_size_read(inode)); - if (length < len) - whole_file = 1; + /* + * Either the i_mutex or other appropriate locking needs to be held + * since we expect isize to not change at all through the duration of + * this call. + */ + if (len >= isize) { + whole_file = true; + len = isize; + } - map_len = length; + start_blk = logical_to_blk(inode, start); + last_blk = logical_to_blk(inode, start + len - 1); do { /* * we set b_size to the total size we want so it will map as * many contiguous blocks as possible at once */ - memset(&tmp, 0, sizeof(struct buffer_head)); - tmp.b_size = map_len; + memset(&map_bh, 0, sizeof(struct buffer_head)); + map_bh.b_size = len; - ret = get_block(inode, start_blk, &tmp, 0); + ret = get_block(inode, start_blk, &map_bh, 0); if (ret) break; /* HOLE */ - if (!buffer_mapped(&tmp)) { - length -= blk_to_logical(inode, 1); + if (!buffer_mapped(&map_bh)) { start_blk++; /* - * we want to handle the case where there is an + * We want to handle the case where there is an * allocated block at the front of the file, and then * nothing but holes up to the end of the file properly, * to make sure that extent at the front gets properly * marked with FIEMAP_EXTENT_LAST */ if (!past_eof && - blk_to_logical(inode, start_blk) >= - blk_to_logical(inode, 0)+i_size_read(inode)) + blk_to_logical(inode, start_blk) >= isize) past_eof = 1; /* - * first hole after going past the EOF, this is our + * First hole after going past the EOF, this is our * last extent */ if (past_eof && size) { @@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode, ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); - break; + } else if (size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, flags); + size = 0; } /* if we have holes up to/past EOF then we're done */ - if (length <= 0 || past_eof) + if (start_blk > last_blk || past_eof || ret) break; } else { /* - * we have gone over the length of what we wanted to + * We have gone over the length of what we wanted to * map, and it wasn't the entire file, so add the extent * we got last time and exit. * @@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode, * are good to go, just add the extent to the fieinfo * and break */ - if (length <= 0 && !whole_file) { + if (start_blk > last_blk && !whole_file) { ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); @@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode, } logical = blk_to_logical(inode, start_blk); - phys = blk_to_logical(inode, tmp.b_blocknr); - size = tmp.b_size; + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = map_bh.b_size; flags = FIEMAP_EXTENT_MERGED; - length -= tmp.b_size; start_blk += logical_to_blk(inode, size); /* @@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode, * soon as we find a hole that the last extent we found * is marked with FIEMAP_EXTENT_LAST */ - if (!past_eof && - logical+size >= - blk_to_logical(inode, 0)+i_size_read(inode)) - past_eof = 1; + if (!past_eof && logical + size >= isize) + past_eof = true; } cond_resched(); } while (1); - /* if ret is 1 then we just hit the end of the extent array */ + /* If ret is 1 then we just hit the end of the extent array */ if (ret == 1) ret = 0; diff --git a/fs/ioprio.c b/fs/ioprio.c index c7c0b28d7d21..748cfb92dcc6 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -19,6 +19,7 @@ * See also Documentation/block/ioprio.txt * */ +#include <linux/gfp.h> #include <linux/kernel.h> #include <linux/ioprio.h> #include <linux/blkdev.h> diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 8ba5441063be..b9ab69b3a482 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -11,6 +11,7 @@ * isofs directory handling functions */ #include <linux/smp_lock.h> +#include <linux/gfp.h> #include "isofs.h" int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index eaa831311c9c..ab438beb867c 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -7,6 +7,7 @@ */ #include <linux/smp_lock.h> +#include <linux/gfp.h> #include "isofs.h" /* diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 2c90e3ef625f..ecb44c94ba8d 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -17,7 +17,6 @@ #include <linux/fs.h> #include <linux/jbd.h> #include <linux/errno.h> -#include <linux/slab.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/bio.h> diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index cb1a49ae605e..54c9bc9e1b17 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -20,7 +20,6 @@ #include <linux/fs.h> #include <linux/jbd.h> #include <linux/errno.h> -#include <linux/slab.h> #endif /* diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 73063285b13f..049281b7cb89 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -20,7 +20,6 @@ #include <linux/fs.h> #include <linux/jbd2.h> #include <linux/errno.h> -#include <linux/slab.h> #include <linux/crc32.h> #endif diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c index 90cb60d09787..cd02acafde8a 100644 --- a/fs/jffs2/compr_lzo.c +++ b/fs/jffs2/compr_lzo.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/lzo.h> diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c index cfd301a5edfc..b46661a42758 100644 --- a/fs/jffs2/compr_zlib.c +++ b/fs/jffs2/compr_zlib.c @@ -14,7 +14,6 @@ #endif #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/zlib.h> #include <linux/zutil.h> #include "nodelist.h" diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c index 5544d31c066b..ec3538413926 100644 --- a/fs/jffs2/debug.c +++ b/fs/jffs2/debug.c @@ -15,6 +15,7 @@ #include <linux/crc32.h> #include <linux/jffs2.h> #include <linux/mtd/mtd.h> +#include <linux/slab.h> #include "nodelist.h" #include "debug.h" diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index b7b74e299142..e7291c161a19 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -10,7 +10,6 @@ */ #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/time.h> #include <linux/pagemap.h> diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index 87c6f555e1a0..af02bd138469 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -15,7 +15,6 @@ #include <linux/mtd/mtd.h> #include <linux/rbtree.h> #include <linux/crc32.h> -#include <linux/slab.h> #include <linux/pagemap.h> #include "nodelist.h" diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 21a052915aa9..191359dde4e1 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -10,7 +10,6 @@ */ #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/mtd/mtd.h> #include <linux/compiler.h> #include <linux/sched.h> /* For cond_resched() */ diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index 4ec11e8bda8c..b955626071c2 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c @@ -10,7 +10,6 @@ */ #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/namei.h> #include "nodelist.h" diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c index ca29440e9435..c819eb0e982d 100644 --- a/fs/jffs2/write.c +++ b/fs/jffs2/write.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/crc32.h> -#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/mtd/mtd.h> #include "nodelist.h" diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 213169780b6c..1057a4998e4e 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -19,6 +19,7 @@ */ #include <linux/sched.h> +#include <linux/slab.h> #include <linux/fs.h> #include <linux/posix_acl_xattr.h> #include "jfs_incore.h" diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 9dd126276c9f..ed9ba6fe04f5 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &jfs_aops; } else { - inode->i_op = &jfs_symlink_inode_operations; + inode->i_op = &jfs_fast_symlink_inode_operations; /* * The inline data should be null-terminated, but * don't let on-disk corruption crash the kernel diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index d9b031cf69f5..9e2f6a721668 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -17,6 +17,7 @@ */ #include <linux/fs.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -195,7 +196,7 @@ int dbMount(struct inode *ipbmap) bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); - bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); + bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); @@ -287,7 +288,7 @@ int dbSync(struct inode *ipbmap) dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); - dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); + dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); @@ -1440,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * tree index of this allocation group within the control page. */ agperlev = - (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; + (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); /* dmap control page trees fan-out by 4 and a single allocation @@ -1459,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * the subtree to find the leftmost leaf that describes this * free space. */ - for (k = bmp->db_agheigth; k > 0; k--) { + for (k = bmp->db_agheight; k > 0; k--) { for (n = 0, m = (ti << 2) + 1; n < 4; n++) { if (l2nb <= dcp->stree[m + n]) { ti = m + n; @@ -3606,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap) } /* - * compute db_aglevel, db_agheigth, db_width, db_agstart: + * compute db_aglevel, db_agheight, db_width, db_agstart: * an ag is covered in aglevel dmapctl summary tree, * at agheight level height (from leaf) with agwidth number of nodes * each, which starts at agstart index node of the smmary tree node @@ -3615,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap) bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); l2nl = bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); - bmp->db_agheigth = l2nl >> 1; - bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); - for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; + bmp->db_agheight = l2nl >> 1; + bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); + for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; i--) { bmp->db_agstart += n; n <<= 2; diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 1a6eb41569bc..6dcb906c55d8 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -210,7 +210,7 @@ struct dbmap_disk { __le32 dn_maxag; /* 4: max active alloc group number */ __le32 dn_agpref; /* 4: preferred alloc group (hint) */ __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ - __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ + __le32 dn_agheight; /* 4: height in dmapctl of the AG */ __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ __le32 dn_agstart; /* 4: start tree index at AG height */ __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ @@ -229,7 +229,7 @@ struct dbmap { int dn_maxag; /* max active alloc group number */ int dn_agpref; /* preferred alloc group (hint) */ int dn_aglevel; /* dmapctl level holding the AG */ - int dn_agheigth; /* height in dmapctl of the AG */ + int dn_agheight; /* height in dmapctl of the AG */ int dn_agwidth; /* width in dmapctl of the AG */ int dn_agstart; /* start tree index at AG height */ int dn_agl2size; /* l2 num of blks per alloc group */ @@ -255,7 +255,7 @@ struct bmap { #define db_agsize db_bmap.dn_agsize #define db_agl2size db_bmap.dn_agl2size #define db_agwidth db_bmap.dn_agwidth -#define db_agheigth db_bmap.dn_agheigth +#define db_agheight db_bmap.dn_agheight #define db_agstart db_bmap.dn_agstart #define db_numag db_bmap.dn_numag #define db_maxlevel db_bmap.dn_maxlevel diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 0e4623be70ce..9197a1b0d02d 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -102,6 +102,7 @@ #include <linux/fs.h> #include <linux/quotaops.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 0fc30407f039..f8332dc8eeb2 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -45,6 +45,7 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_inode.h" diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 79e2c79661df..9e6bda30a6e8 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations; extern const struct inode_operations jfs_file_inode_operations; extern const struct file_operations jfs_file_operations; extern const struct inode_operations jfs_symlink_inode_operations; +extern const struct inode_operations jfs_fast_symlink_inode_operations; extern const struct dentry_operations jfs_ci_dentry_operations; #endif /* _H_JFS_INODE */ diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 335c4de6552d..c51af2a14516 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -70,6 +70,7 @@ #include <linux/delay.h> #include <linux/mutex.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 07b6c5dfb4b6..48b44bd8267b 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -21,6 +21,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/bio.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/mempool.h> diff --git a/fs/jfs/jfs_unicode.h b/fs/jfs/jfs_unicode.h index 3fbb3a225590..8f0f02cb6ca6 100644 --- a/fs/jfs/jfs_unicode.h +++ b/fs/jfs/jfs_unicode.h @@ -19,6 +19,7 @@ #ifndef _H_JFS_UNICODE #define _H_JFS_UNICODE +#include <linux/slab.h> #include <asm/byteorder.h> #include "jfs_types.h" diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 4a3e9f39c21d..a9cf8e8675be 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, */ if (ssize <= IDATASIZE) { - ip->i_op = &jfs_symlink_inode_operations; + ip->i_op = &jfs_fast_symlink_inode_operations; i_fastsymlink = JFS_IP(ip)->i_inline; memcpy(i_fastsymlink, name, ssize); @@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, else { jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); - ip->i_op = &page_symlink_inode_operations; + ip->i_op = &jfs_symlink_inode_operations; ip->i_mapping->a_ops = &jfs_aops; /* diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 7f24a0bb08ca..1aba0039f1c9 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) struct inode *iplist[1]; struct jfs_superblock *j_sb, *j_sb2; uint old_agsize; + int agsizechanged = 0; struct buffer_head *bh, *bh2; /* If the volume hasn't grown, get out now */ @@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) goto error_out; + + agsizechanged |= (bmp->db_agsize != old_agsize); + /* * the map now has extended to cover additional nblocks: * dn_mapsize = oldMapsize + nblocks; @@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * will correctly identify the new ag); */ /* if new AG size the same as old AG size, done! */ - if (bmp->db_agsize != old_agsize) { + if (agsizechanged) { if ((rc = diExtendFS(ipimap, ipbmap))) goto error_out; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 266699deb1c6..157382fa6256 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -30,6 +30,7 @@ #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/crc32.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/seq_file.h> #include <linux/smp_lock.h> diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 4af1a05aad0a..205b946d8e0d 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -const struct inode_operations jfs_symlink_inode_operations = { +const struct inode_operations jfs_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = jfs_follow_link, + .setattr = jfs_setattr, + .setxattr = jfs_setxattr, + .getxattr = jfs_getxattr, + .listxattr = jfs_listxattr, + .removexattr = jfs_removexattr, +}; + +const struct inode_operations jfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .setattr = jfs_setattr, .setxattr = jfs_setxattr, .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 1f594ab21895..fa96bbb26343 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> +#include <linux/slab.h> #include <linux/quotaops.h> #include <linux/security.h> #include "jfs_incore.h" diff --git a/fs/libfs.c b/fs/libfs.c index 9e50bcf55857..ea9a6cc9b35c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/pagemap.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/vfs.h> #include <linux/mutex.h> diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index fc9032dc8862..64fd427c993c 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/types.h> +#include <linux/slab.h> #include <linux/time.h> #include <linux/nfs_fs.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index c81249fef11f..7932c399fab4 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/smp_lock.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index fefa4df3f005..e3015464fbab 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -10,6 +10,7 @@ #include <linux/utsname.h> #include <linux/kernel.h> #include <linux/ktime.h> +#include <linux/slab.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprtsock.h> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 7d150517ddf0..f1bacf1a0391 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -21,7 +21,6 @@ #include <linux/errno.h> #include <linux/in.h> #include <linux/uio.h> -#include <linux/slab.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/mutex.h> diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index a7966eed3c17..031c6569a134 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -9,7 +9,6 @@ #include <linux/types.h> #include <linux/time.h> -#include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/lockd/lockd.h> #include <linux/lockd/share.h> diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index d1001790fa9a..84055d31bfc5 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -21,6 +21,7 @@ */ #include <linux/types.h> +#include <linux/slab.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched.h> diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 56c9519d900a..0f2ab741ae7c 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -9,7 +9,6 @@ #include <linux/types.h> #include <linux/time.h> -#include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/lockd/lockd.h> #include <linux/lockd/share.h> diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index ad478da7ca63..d0ef94cfb3da 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -10,6 +10,7 @@ #include <linux/string.h> #include <linux/time.h> #include <linux/in.h> +#include <linux/slab.h> #include <linux/mutex.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 9718c22f186d..243c00071f76 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -9,6 +9,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> +#include <linux/gfp.h> #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) @@ -80,6 +81,7 @@ static void writeseg_end_io(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); end_page_writeback(page); + page_cache_release(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); if (atomic_dec_and_test(&super->s_pending_writes)) @@ -97,8 +99,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); int i; + if (max_pages > BIO_MAX_PAGES) + max_pages = BIO_MAX_PAGES; bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); /* FIXME: handle this */ + BUG_ON(!bio); for (i = 0; i < nr_pages; i++) { if (i >= max_pages) { @@ -191,8 +195,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); int i; + if (max_pages > BIO_MAX_PAGES) + max_pages = BIO_MAX_PAGES; bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); /* FIXME: handle this */ + BUG_ON(!bio); for (i = 0; i < nr_pages; i++) { if (i >= max_pages) { diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 56a8bfbb0120..2396a85c0f55 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -6,7 +6,7 @@ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> */ #include "logfs.h" - +#include <linux/slab.h> /* * Atomic dir operations @@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) (filler_t *)logfs_readpage, NULL); if (IS_ERR(page)) return PTR_ERR(page); - dd = kmap_atomic(page, KM_USER0); + dd = kmap(page); BUG_ON(dd->namelen == 0); full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), pos, be64_to_cpu(dd->ino), dd->type); - kunmap_atomic(dd, KM_USER0); + kunmap(page); page_cache_release(page); if (full) break; diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c index 92949f95a901..76c242fbe1b0 100644 --- a/fs/logfs/gc.c +++ b/fs/logfs/gc.c @@ -7,6 +7,7 @@ */ #include "logfs.h" #include <linux/sched.h> +#include <linux/slab.h> /* * Wear leveling needs to kick in when the difference between low erase @@ -458,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target) struct logfs_block *block; int round, progress, last_progress = 0; + /* + * Doing too many changes to the segfile at once would result + * in a large number of aliases. Write the journal before + * things get out of hand. + */ + if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES) + logfs_write_anchor(sb); + if (no_free_segments(sb) >= target && super->s_no_object_aliases < MAX_OBJ_ALIASES) return; diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 33ec1aeaeec4..14ed27274da2 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -6,6 +6,7 @@ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> */ #include "logfs.h" +#include <linux/slab.h> #include <linux/writeback.h> #include <linux/backing-dev.h> diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 6ad30a4c9052..fb0a613f885b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -6,6 +6,7 @@ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> */ #include "logfs.h" +#include <linux/slab.h> static void logfs_calc_free(struct super_block *sb) { @@ -388,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area) static int journal_erase_segment(struct logfs_area *area) { struct super_block *sb = area->a_sb; - struct logfs_segment_header sh; + union { + struct logfs_segment_header sh; + unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)]; + } u; u64 ofs; int err; @@ -396,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area) if (err) return err; - sh.pad = 0; - sh.type = SEG_JOURNAL; - sh.level = 0; - sh.segno = cpu_to_be32(area->a_segno); - sh.ec = cpu_to_be32(area->a_erase_count); - sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); - sh.crc = logfs_crc32(&sh, sizeof(sh), 4); + memset(&u, 0, sizeof(u)); + u.sh.pad = 0; + u.sh.type = SEG_JOURNAL; + u.sh.level = 0; + u.sh.segno = cpu_to_be32(area->a_segno); + u.sh.ec = cpu_to_be32(area->a_erase_count); + u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); + u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4); /* This causes a bug in segment.c. Not yet. */ //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); ofs = dev_ofs(sb, area->a_segno, 0); - area->a_used_bytes = ALIGN(sizeof(sh), 16); - logfs_buf_write(area, ofs, &sh, sizeof(sh)); + area->a_used_bytes = sizeof(u); + logfs_buf_write(area, ofs, &u, sizeof(u)); return 0; } @@ -493,6 +498,8 @@ static void account_shadows(struct super_block *sb) btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); + btree_grim_visitor32(&tree->segment_map, 0, NULL); + tree->no_shadowed_segments = 0; if (li->li_block) { /* @@ -606,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, if (len == 0) return logfs_write_header(super, header, 0, type); + BUG_ON(len > sb->s_blocksize); compr_len = logfs_compress(buf, data, len, sb->s_blocksize); if (compr_len < 0 || type == JE_ANCHOR) { - BUG_ON(len > sb->s_blocksize); memcpy(data, buf, len); compr_len = len; compr = COMPR_NONE; @@ -660,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type, if (ofs < 0) return ofs; logfs_buf_write(area, ofs, super->s_compressed_je, len); + BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES); super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); return 0; } @@ -800,6 +808,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); struct logfs_area *area = super->s_journal_area; + struct btree_head32 *head = &super->s_reserved_segments; u32 segno, ec; int i, err; @@ -807,6 +816,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) /* Drop old segments */ journal_for_each(i) if (super->s_journal_seg[i]) { + btree_remove32(head, super->s_journal_seg[i]); logfs_set_segment_unreserved(sb, super->s_journal_seg[i], super->s_journal_ec[i]); @@ -819,8 +829,13 @@ void do_logfs_journal_wl_pass(struct super_block *sb) super->s_journal_seg[i] = segno; super->s_journal_ec[i] = ec; logfs_set_segment_reserved(sb, segno); + err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); + BUG_ON(err); /* mempool should prevent this */ + err = logfs_erase_segment(sb, segno, 1); + BUG_ON(err); /* FIXME: remount-ro would be nicer */ } /* Manually move journal_area */ + freeseg(sb, area->a_segno); area->a_segno = super->s_journal_seg[0]; area->a_is_open = 0; area->a_used_bytes = 0; diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 129779431373..0a3df1a0c936 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -257,10 +257,14 @@ struct logfs_shadow { * struct shadow_tree * @new: shadows where old_ofs==0, indexed by new_ofs * @old: shadows where old_ofs!=0, indexed by old_ofs + * @segment_map: bitfield of segments containing shadows + * @no_shadowed_segment: number of segments containing shadows */ struct shadow_tree { struct btree_head64 new; struct btree_head64 old; + struct btree_head32 segment_map; + int no_shadowed_segments; }; struct object_alias_item { @@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix, level_t level, int child_no, __be64 val); struct logfs_block_ops { void (*write_block)(struct logfs_block *block); - gc_level_t (*block_level)(struct logfs_block *block); void (*free_block)(struct super_block *sb, struct logfs_block*block); int (*write_alias)(struct super_block *sb, struct logfs_block *block, write_alias_t *write_one_alias); }; +#define MAX_JOURNAL_ENTRIES 256 + struct logfs_super { struct mtd_info *s_mtd; /* underlying device */ struct block_device *s_bdev; /* underlying device */ @@ -378,7 +383,7 @@ struct logfs_super { u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ u64 s_last_version; struct logfs_area *s_journal_area; /* open journal segment */ - __be64 s_je_array[64]; + __be64 s_je_array[MAX_JOURNAL_ENTRIES]; int s_no_je; int s_sum_index; /* for the 12 summaries */ @@ -587,6 +592,7 @@ void move_page_to_btree(struct page *page); int logfs_init_mapping(struct super_block *sb); void logfs_sync_area(struct logfs_area *area); void logfs_sync_segments(struct super_block *sb); +void freeseg(struct super_block *sb, u32 segno); /* area handling */ int logfs_init_areas(struct super_block *sb); @@ -721,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb, return logfs_super(sb)->s_area[(__force u8)gc_level]; } +static inline void logfs_mempool_destroy(mempool_t *pool) +{ + if (pool) + mempool_destroy(pool); +} + #endif diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 7a23b3e7c0a7..3159db6958e5 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -18,6 +18,7 @@ */ #include "logfs.h" #include <linux/sched.h> +#include <linux/slab.h> static u64 adjust_bix(u64 bix, level_t level) { @@ -429,25 +430,6 @@ static void inode_write_block(struct logfs_block *block) } } -static gc_level_t inode_block_level(struct logfs_block *block) -{ - BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER); - return GC_LEVEL(LOGFS_MAX_LEVELS); -} - -static gc_level_t indirect_block_level(struct logfs_block *block) -{ - struct page *page; - struct inode *inode; - u64 bix; - level_t level; - - page = block->page; - inode = page->mapping->host; - logfs_unpack_index(page->index, &bix, &level); - return expand_level(inode->i_ino, level); -} - /* * This silences a false, yet annoying gcc warning. I hate it when my editor * jumps into bitops.h each time I recompile this file. @@ -586,14 +568,12 @@ static void indirect_free_block(struct super_block *sb, static struct logfs_block_ops inode_block_ops = { .write_block = inode_write_block, - .block_level = inode_block_level, .free_block = inode_free_block, .write_alias = inode_write_alias, }; struct logfs_block_ops indirect_block_ops = { .write_block = indirect_write_block, - .block_level = indirect_block_level, .free_block = indirect_free_block, .write_alias = indirect_write_alias, }; @@ -1240,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow) mempool_free(shadow, super->s_shadow_pool); } +static void mark_segment(struct shadow_tree *tree, u32 segno) +{ + int err; + + if (!btree_lookup32(&tree->segment_map, segno)) { + err = btree_insert32(&tree->segment_map, segno, (void *)1, + GFP_NOFS); + BUG_ON(err); + tree->no_shadowed_segments++; + } +} + /** * fill_shadow_tree - Propagate shadow tree changes due to a write * @inode: Inode owning the page @@ -1287,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page, super->s_dirty_used_bytes += shadow->new_len; super->s_dirty_free_bytes += shadow->old_len; + mark_segment(tree, shadow->old_ofs >> super->s_segshift); + mark_segment(tree, shadow->new_ofs >> super->s_segshift); } } @@ -1594,7 +1588,6 @@ int logfs_delete(struct inode *inode, pgoff_t index, return ret; } -/* Rewrite cannot mark the inode dirty but has to write it immediatly. */ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, gc_level_t gc_level, long flags) { @@ -1611,6 +1604,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, if (level != 0) alloc_indirect_block(inode, page, 0); err = logfs_write_buf(inode, page, flags); + if (!err && shrink_level(gc_level) == 0) { + /* Rewrite cannot mark the inode dirty but has to + * write it immediatly. + * Q: Can't we just create an alias for the inode + * instead? And if not, why not? + */ + if (inode->i_ino == LOGFS_INO_MASTER) + logfs_write_anchor(inode->i_sb); + else { + err = __logfs_write_inode(inode, flags); + } + } } logfs_put_write_page(page); return err; @@ -1833,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size) return logfs_truncate_direct(inode, size); } -int logfs_truncate(struct inode *inode, u64 size) +/* + * Truncate, by changing the segment file, can consume a fair amount + * of resources. So back off from time to time and do some GC. + * 8 or 2048 blocks should be well within safety limits even if + * every single block resided in a different segment. + */ +#define TRUNCATE_STEP (8 * 1024 * 1024) +int logfs_truncate(struct inode *inode, u64 target) { struct super_block *sb = inode->i_sb; - int err; + u64 size = i_size_read(inode); + int err = 0; - logfs_get_wblocks(sb, NULL, 1); - err = __logfs_truncate(inode, size); - if (!err) - err = __logfs_write_inode(inode, 0); - logfs_put_wblocks(sb, NULL, 1); + size = ALIGN(size, TRUNCATE_STEP); + while (size > target) { + if (size > TRUNCATE_STEP) + size -= TRUNCATE_STEP; + else + size = 0; + if (size < target) + size = target; + + logfs_get_wblocks(sb, NULL, 1); + err = __logfs_truncate(inode, target); + if (!err) + err = __logfs_write_inode(inode, 0); + logfs_put_wblocks(sb, NULL, 1); + } if (!err) - err = vmtruncate(inode, size); + err = vmtruncate(inode, target); /* I don't trust error recovery yet. */ WARN_ON(err); @@ -2239,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb) struct logfs_super *super = logfs_super(sb); destroy_meta_inode(super->s_segfile_inode); - if (super->s_block_pool) - mempool_destroy(super->s_block_pool); - if (super->s_shadow_pool) - mempool_destroy(super->s_shadow_pool); + logfs_mempool_destroy(super->s_block_pool); + logfs_mempool_destroy(super->s_shadow_pool); } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 1a14f9910d55..f77ce2b470ba 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -10,6 +10,7 @@ * three kinds of objects: inodes, dentries and blocks, both data and indirect. */ #include "logfs.h" +#include <linux/slab.h> static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) { @@ -93,50 +94,58 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, } while (len); } -/* - * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. - */ -static void pad_wbuf(struct logfs_area *area, int final) +static void pad_partial_page(struct logfs_area *area) { struct super_block *sb = area->a_sb; - struct logfs_super *super = logfs_super(sb); struct page *page; u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); pgoff_t index = ofs >> PAGE_SHIFT; long offset = ofs & (PAGE_SIZE-1); u32 len = PAGE_SIZE - offset; - if (len == PAGE_SIZE) { - /* The math in this function can surely use some love */ - len = 0; - } - if (len) { - BUG_ON(area->a_used_bytes >= super->s_segsize); - - page = get_mapping_page(area->a_sb, index, 0); + if (len % PAGE_SIZE) { + page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ memset(page_address(page) + offset, 0xff, len); SetPagePrivate(page); page_cache_release(page); } +} - if (!final) - return; +static void pad_full_pages(struct logfs_area *area) +{ + struct super_block *sb = area->a_sb; + struct logfs_super *super = logfs_super(sb); + u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); + u32 len = super->s_segsize - area->a_used_bytes; + pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; + pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; + struct page *page; - area->a_used_bytes += len; - for ( ; area->a_used_bytes < super->s_segsize; - area->a_used_bytes += PAGE_SIZE) { - /* Memset another page */ - index++; - page = get_mapping_page(area->a_sb, index, 0); + while (no_indizes) { + page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ - memset(page_address(page), 0xff, PAGE_SIZE); + SetPageUptodate(page); + memset(page_address(page), 0xff, PAGE_CACHE_SIZE); SetPagePrivate(page); page_cache_release(page); + index++; + no_indizes--; } } /* + * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. + * Also make sure we allocate (and memset) all pages for final writeout. + */ +static void pad_wbuf(struct logfs_area *area, int final) +{ + pad_partial_page(area); + if (final) + pad_full_pages(area); +} + +/* * We have to be careful with the alias tree. Since lookup is done by bix, * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with * indirect blocks. So always use it through accessor functions. @@ -174,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block, return 0; } -static gc_level_t btree_block_level(struct logfs_block *block) -{ - return expand_level(block->ino, block->level); -} - static struct logfs_block_ops btree_block_ops = { .write_block = btree_write_block, - .block_level = btree_block_level, .free_block = __free_block, .write_alias = btree_write_alias, }; @@ -683,7 +686,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) return 0; } -static void freeseg(struct super_block *sb, u32 segno) +void freeseg(struct super_block *sb, u32 segno) { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; @@ -910,7 +913,7 @@ err: for (i--; i >= 0; i--) free_area(super->s_area[i]); free_area(super->s_journal_area); - mempool_destroy(super->s_alias_pool); + logfs_mempool_destroy(super->s_alias_pool); return -ENOMEM; } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index c66beab78dee..5866ee6e1327 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -11,6 +11,8 @@ */ #include "logfs.h" #include <linux/bio.h> +#include <linux/slab.h> +#include <linux/blkdev.h> #include <linux/mtd/mtd.h> #include <linux/statfs.h> #include <linux/buffer_head.h> @@ -136,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super) sb->s_fs_info = super; sb->s_mtd = super->s_mtd; sb->s_bdev = super->s_bdev; + if (sb->s_bdev) + sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; + if (sb->s_mtd) + sb->s_bdi = sb->s_mtd->backing_dev_info; return 0; } @@ -277,7 +283,7 @@ static int logfs_recover_sb(struct super_block *sb) } if (valid0 && valid1 && ds_cmp(ds0, ds1)) { printk(KERN_INFO"Superblocks don't match - fixing.\n"); - return write_one_sb(sb, super->s_devops->find_last_sb); + return logfs_write_sb(sb); } /* If neither is valid now, something's wrong. Didn't we properly * check them before?!? */ @@ -289,6 +295,10 @@ static int logfs_make_writeable(struct super_block *sb) { int err; + err = logfs_open_segfile(sb); + if (err) + return err; + /* Repair any broken superblock copies */ err = logfs_recover_sb(sb); if (err) @@ -299,10 +309,6 @@ static int logfs_make_writeable(struct super_block *sb) if (err) return err; - err = logfs_open_segfile(sb); - if (err) - return err; - /* Do one GC pass before any data gets dirtied */ logfs_gc_pass(sb); @@ -328,7 +334,7 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) sb->s_root = d_alloc_root(rootdir); if (!sb->s_root) - goto fail; + goto fail2; super->s_erase_page = alloc_pages(GFP_KERNEL, 0); if (!super->s_erase_page) @@ -451,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only) btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); + btree_init_mempool32(&super->s_shadow_tree.segment_map, + super->s_btree_pool); ret = logfs_init_mapping(sb); if (ret) @@ -515,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb) if (super->s_erase_page) __free_page(super->s_erase_page); super->s_devops->put_device(sb); - mempool_destroy(super->s_btree_pool); - mempool_destroy(super->s_alias_pool); + logfs_mempool_destroy(super->s_btree_pool); + logfs_mempool_destroy(super->s_alias_pool); kfree(super); log_super("LogFS: Finished unmounting\n"); } @@ -572,8 +580,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags, return 0; err1: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; err0: kfree(super); diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index 82d6554b02fe..282e15ad8cd8 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -1,4 +1,5 @@ #include <linux/buffer_head.h> +#include <linux/slab.h> #include "minix.h" enum {DEPTH = 3, DIRECT = 7}; /* Only double indirect */ diff --git a/fs/mpage.c b/fs/mpage.c index 598d54e200eb..fd56ca2ea556 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/kdev_t.h> +#include <linux/gfp.h> #include <linux/bio.h> #include <linux/fs.h> #include <linux/buffer_head.h> diff --git a/fs/namei.c b/fs/namei.c index 1c0fca6e899e..a7dce91a7e42 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1610,8 +1610,7 @@ exit: static struct file *do_last(struct nameidata *nd, struct path *path, int open_flag, int acc_mode, - int mode, const char *pathname, - int *want_dir) + int mode, const char *pathname) { struct dentry *dir = nd->path.dentry; struct file *filp; @@ -1642,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) { if (open_flag & O_CREAT) goto exit; - *want_dir = 1; + nd->flags |= LOOKUP_DIRECTORY; } /* just plain open? */ @@ -1656,8 +1655,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (path->dentry->d_inode->i_op->follow_link) return NULL; error = -ENOTDIR; - if (*want_dir && !path->dentry->d_inode->i_op->lookup) - goto exit_dput; + if (nd->flags & LOOKUP_DIRECTORY) { + if (!path->dentry->d_inode->i_op->lookup) + goto exit_dput; + } path_to_nameidata(path, nd); audit_inode(pathname, nd->path.dentry); goto ok; @@ -1766,7 +1767,6 @@ struct file *do_filp_open(int dfd, const char *pathname, int count = 0; int flag = open_to_namei_flags(open_flag); int force_reval = 0; - int want_dir = open_flag & O_DIRECTORY; if (!(open_flag & O_CREAT)) mode = 0; @@ -1828,7 +1828,9 @@ reval: if (open_flag & O_EXCL) nd.flags |= LOOKUP_EXCL; } - filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir); + if (open_flag & O_DIRECTORY) + nd.flags |= LOOKUP_DIRECTORY; + filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path holder; struct inode *inode = path.dentry->d_inode; @@ -1866,7 +1868,7 @@ reval: } holder = path; nd.flags &= ~LOOKUP_PARENT; - filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir); + filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); if (inode->i_op->put_link) inode->i_op->put_link(holder.dentry, &nd, cookie); path_put(&holder); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index b8b5b30d53f0..7edfcd4d5e52 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -15,7 +15,6 @@ #include <linux/errno.h> #include <linux/stat.h> #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <asm/uaccess.h> diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 6a7d901f1936..1daabb90e0a5 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -15,7 +15,6 @@ #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/sched.h> #include <linux/smp_lock.h> diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index cf98da1be23e..fa3385154023 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_blocksize_bits = 10; sb->s_magic = NCP_SUPER_MAGIC; sb->s_op = &ncp_sops; + sb->s_bdi = &server->bdi; server = NCP_SBP(sb); memset(server, 0, sizeof(*server)); + error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); + if (error) + goto out_bdi; + server->ncp_filp = ncp_filp; server->ncp_sock = sock; @@ -719,6 +724,8 @@ out_fput2: if (server->info_filp) fput(server->info_filp); out_fput: + bdi_destroy(&server->bdi); +out_bdi: /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: * * The previously used put_filp(ncp_filp); was bogous, since @@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb) kill_pid(server->m.wdog_pid, SIGTERM, 1); put_pid(server->m.wdog_pid); + bdi_destroy(&server->bdi); kfree(server->priv.data); kfree(server->auth.object_name); vfree(server->rxbuf); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index ec8f45f12e05..60a5e2864ea8 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -15,6 +15,7 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/mount.h> +#include <linux/slab.h> #include <linux/highuid.h> #include <linux/smp_lock.h> #include <linux/vmalloc.h> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 15458decdb8a..56f5b3a0e1ee 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -9,12 +9,12 @@ #include <linux/stat.h> #include <linux/time.h> #include <linux/kernel.h> +#include <linux/gfp.h> #include <linux/mm.h> #include <linux/shm.h> #include <linux/errno.h> #include <linux/mman.h> #include <linux/string.h> -#include <linux/slab.h> #include <linux/fcntl.h> #include <linux/ncp_fs.h> diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index e37df8d5fe70..c7ff6c700a6e 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c @@ -21,6 +21,7 @@ #include <linux/mm.h> #include <linux/netdevice.h> #include <linux/signal.h> +#include <linux/slab.h> #include <net/scm.h> #include <net/sock.h> #include <linux/ipx.h> diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c index e3d26c1bd105..c634fd17b337 100644 --- a/fs/ncpfs/symlink.c +++ b/fs/ncpfs/symlink.c @@ -27,6 +27,7 @@ #include <linux/fs.h> #include <linux/ncp_fs.h> #include <linux/time.h> +#include <linux/slab.h> #include <linux/mm.h> #include <linux/stat.h> #include "ncplib_kernel.h" diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index b4ffd0146ea6..84690319e625 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -10,6 +10,7 @@ #include <linux/moduleparam.h> #include <linux/mount.h> #include <linux/namei.h> +#include <linux/slab.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/rpc_pipe_fs.h> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 84761b5bb8e2..a08770a7e857 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -7,6 +7,7 @@ */ #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/slab.h> #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a2b8b4df125d..05af212f0edf 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -9,6 +9,7 @@ #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/slab.h> #include "nfs4_fs.h" #include "callback.h" diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2274f1737336..acc9c4943b84 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -35,6 +35,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include <linux/in6.h> +#include <linux/slab.h> #include <net/ipv6.h> #include <linux/nfs_xdr.h> #include <linux/sunrpc/bc_xprt.h> @@ -965,6 +966,8 @@ out_error: static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; + target->rsize = source->rsize; + target->wsize = source->wsize; target->acregmin = source->acregmin; target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; @@ -1293,7 +1296,8 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| + NFS_CAP_POSIX_LOCK; server->options = data->options; /* Get a client record */ diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 2563bebc4c67..ea61d26e7871 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -10,6 +10,7 @@ #include <linux/kthread.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/spinlock.h> @@ -23,6 +24,8 @@ static void nfs_do_free_delegation(struct nfs_delegation *delegation) { + if (delegation->cred) + put_rpccred(delegation->cred); kfree(delegation); } @@ -35,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head) static void nfs_free_delegation(struct nfs_delegation *delegation) { - struct rpc_cred *cred; - - cred = rcu_dereference(delegation->cred); - rcu_assign_pointer(delegation->cred, NULL); call_rcu(&delegation->rcu, nfs_free_delegation_callback); - if (cred) - put_rpccred(cred); } void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) @@ -128,21 +125,35 @@ again: */ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct rpc_cred *oldcred; + struct nfs_delegation *delegation; + struct rpc_cred *oldcred = NULL; - if (delegation == NULL) - return; - memcpy(delegation->stateid.data, res->delegation.data, - sizeof(delegation->stateid.data)); - delegation->type = res->delegation_type; - delegation->maxsize = res->maxsize; - oldcred = delegation->cred; - delegation->cred = get_rpccred(cred); - clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); - NFS_I(inode)->delegation_state = delegation->type; - smp_wmb(); - put_rpccred(oldcred); + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) { + spin_lock(&delegation->lock); + if (delegation->inode != NULL) { + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + oldcred = delegation->cred; + delegation->cred = get_rpccred(cred); + clear_bit(NFS_DELEGATION_NEED_RECLAIM, + &delegation->flags); + NFS_I(inode)->delegation_state = delegation->type; + spin_unlock(&delegation->lock); + put_rpccred(oldcred); + rcu_read_unlock(); + } else { + /* We appear to have raced with a delegation return. */ + spin_unlock(&delegation->lock); + rcu_read_unlock(); + nfs_inode_set_delegation(inode, cred, res); + } + } else { + rcu_read_unlock(); + } } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) @@ -165,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation return inode; } -static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, + const nfs4_stateid *stateid, + struct nfs_client *clp) { - struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + struct nfs_delegation *delegation = + rcu_dereference_protected(nfsi->delegation, + lockdep_is_held(&clp->cl_lock)); if (delegation == NULL) goto nomatch; @@ -194,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_delegation *delegation; + struct nfs_delegation *delegation, *old_delegation; struct nfs_delegation *freeme = NULL; int status = 0; @@ -212,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct spin_lock_init(&delegation->lock); spin_lock(&clp->cl_lock); - if (rcu_dereference(nfsi->delegation) != NULL) { - if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, - sizeof(delegation->stateid)) == 0 && - delegation->type == nfsi->delegation->type) { + old_delegation = rcu_dereference_protected(nfsi->delegation, + lockdep_is_held(&clp->cl_lock)); + if (old_delegation != NULL) { + if (memcmp(&delegation->stateid, &old_delegation->stateid, + sizeof(old_delegation->stateid)) == 0 && + delegation->type == old_delegation->type) { goto out; } /* @@ -225,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct dfprintk(FILE, "%s: server %s handed out " "a duplicate delegation!\n", __func__, clp->cl_hostname); - if (delegation->type <= nfsi->delegation->type) { + if (delegation->type <= old_delegation->type) { freeme = delegation; delegation = NULL; goto out; } - freeme = nfs_detach_delegation_locked(nfsi, NULL); + freeme = nfs_detach_delegation_locked(nfsi, NULL, clp); } list_add_rcu(&delegation->super_list, &clp->cl_delegations); nfsi->delegation_state = delegation->type; @@ -300,7 +317,7 @@ restart: if (inode == NULL) continue; spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); spin_unlock(&clp->cl_lock); rcu_read_unlock(); if (delegation != NULL) { @@ -329,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - if (rcu_dereference(nfsi->delegation) != NULL) { + if (rcu_access_pointer(nfsi->delegation) != NULL) { spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, NULL); + delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); spin_unlock(&clp->cl_lock); if (delegation != NULL) nfs_do_return_delegation(inode, delegation, 0); @@ -345,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode) struct nfs_delegation *delegation; int err = 0; - if (rcu_dereference(nfsi->delegation) != NULL) { + if (rcu_access_pointer(nfsi->delegation) != NULL) { spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, NULL); + delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); spin_unlock(&clp->cl_lock); if (delegation != NULL) { nfs_msync_inode(inode); @@ -525,7 +542,7 @@ restart: if (inode == NULL) continue; spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); spin_unlock(&clp->cl_lock); rcu_read_unlock(); if (delegation != NULL) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c6f2750648f4..a7bb5c694aa3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -837,6 +837,8 @@ out_zap_parent: /* If we have submounts, don't unhash ! */ if (have_submounts(dentry)) goto out_valid; + if (dentry->d_flags & DCACHE_DISCONNECTED) + goto out_valid; shrink_dcache_parent(dentry); } d_drop(dentry); @@ -1025,12 +1027,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = NULL; goto out; /* This turned out not to be a regular file */ + case -EISDIR: case -ENOTDIR: goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; - /* case -EISDIR: */ /* case -EINVAL: */ default: goto out; @@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *dir; int openflags, ret = 0; - if (!is_atomic_open(nd)) + if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; parent = dget_parent(dentry); dir = parent->d_inode; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0d289823e856..ad4cd31d6050 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -44,6 +44,7 @@ #include <linux/file.h> #include <linux/pagemap.h> #include <linux/kref.h> +#include <linux/slab.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 3f0cd4dfddaf..76fd235d0024 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -9,6 +9,7 @@ #include <linux/hash.h> #include <linux/string.h> #include <linux/kmod.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/seq_file.h> diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ae8d02294e46..8d965bddb87e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -24,9 +24,9 @@ #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/aio.h> +#include <linux/gfp.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -491,7 +491,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) { dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - if (gfp & __GFP_WAIT) + /* Only do I/O if gfp is a superset of GFP_KERNEL */ + if ((gfp & GFP_KERNEL) == GFP_KERNEL) nfs_wb_page(page->mapping->host, page); /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 237874f1af23..a6b16ed93229 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -17,6 +17,7 @@ #include <linux/nfs_fs_sb.h> #include <linux/in6.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include "internal.h" #include "iostat.h" diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e358df75a6ad..50a56edca0b5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -36,6 +36,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include <linux/nfs_xdr.h> +#include <linux/slab.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -622,10 +623,10 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c list_for_each_entry(pos, &nfsi->open_files, list) { if (cred != NULL && pos->cred != cred) continue; - if ((pos->mode & mode) == mode) { - ctx = get_nfs_open_context(pos); - break; - } + if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) + continue; + ctx = get_nfs_open_context(pos); + break; } spin_unlock(&inode->i_lock); return ctx; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 40c766782891..7888cf36022d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -8,6 +8,7 @@ */ #include <linux/dcache.h> +#include <linux/gfp.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/nfs_fs.h> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 7bc2da8efd4a..81cf14257916 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -12,7 +12,6 @@ #include <linux/param.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index bac60515a4b3..d150ae0c5ecd 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -1,4 +1,5 @@ #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs_fs.h> diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 24992f0a29f2..e701002694e5 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> +#include <linux/slab.h> #include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs_fs.h> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 5fe5492fbd29..56a86f6ac8b5 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -9,7 +9,6 @@ #include <linux/param.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index fa3408f20112..f071d12c613b 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -11,6 +11,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/nfs_fs.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> #include <linux/vfs.h> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f9254fb0c9d0..071fcedd517c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -39,6 +39,7 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/string.h> +#include <linux/slab.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs.h> #include <linux/nfs4.h> @@ -1522,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_post_op_update_inode(dir, o_res->dir_attr); } else nfs_refresh_inode(dir, o_res->dir_attr); + if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) + server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(data); if (status != 0) @@ -1663,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = PTR_ERR(state); if (IS_ERR(state)) goto err_opendata_put; - if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) + if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); @@ -2067,8 +2070,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st case -EDQUOT: case -ENOSPC: case -EROFS: - lookup_instantiate_filp(nd, (struct dentry *)state, NULL); - return 1; + return PTR_ERR(state); default: goto out_drop; } @@ -5216,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) + if (IS_ERR(task)) { status = PTR_ERR(task); + goto out; + } rpc_put_task(task); + return 0; out: dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4d338be492cb..38f3b582e7c2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -38,7 +38,6 @@ #include <linux/param.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> @@ -5552,6 +5551,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf if (status != 0) goto out; status = decode_delegreturn(&xdr); + if (status != 0) + goto out; decode_getfattr(&xdr, res->fattr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)); out: diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c752d944fe9e..0288be80444f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -29,7 +29,6 @@ #include <linux/types.h> #include <linux/param.h> -#include <linux/slab.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/errno.h> diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6baf9a393466..b4148fc00f9f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -48,6 +48,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include <linux/in6.h> +#include <linux/slab.h> #include <net/ipv6.h> #include <linux/netdevice.h> #include <linux/nfs_xdr.h> @@ -2186,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (data->version == 4) { error = nfs4_try_mount(flags, dev_name, data, mnt); kfree(data->client_address); + kfree(data->nfs_server.export_path); goto out; } #endif /* CONFIG_NFS_V4 */ @@ -2656,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) devname = nfs_path(path->mnt->mnt_devname, path->mnt->mnt_root, path->dentry, page, PAGE_SIZE); - if (devname == NULL) + if (IS_ERR(devname)) goto out_freepage; tmp = kstrdup(devname, GFP_KERNEL); if (tmp == NULL) diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 2ea9e5c27e55..05c9e02f4153 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -19,7 +19,6 @@ #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/string.h> #include <linux/namei.h> diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53ff70e23993..3aea3ca98ab7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -201,6 +201,7 @@ static int nfs_set_page_writeback(struct page *page) struct inode *inode = page->mapping->host; struct nfs_server *nfss = NFS_SERVER(inode); + page_cache_get(page); if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) { set_bdi_congested(&nfss->backing_dev_info, @@ -216,6 +217,7 @@ static void nfs_end_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); + page_cache_release(page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } @@ -421,6 +423,7 @@ static void nfs_mark_request_dirty(struct nfs_page *req) { __set_page_dirty_nobuffers(req->wb_page); + __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -660,9 +663,11 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, req = nfs_setup_write_request(ctx, page, offset, count); if (IS_ERR(req)) return PTR_ERR(req); + nfs_mark_request_dirty(req); /* Update file length */ nfs_grow_file(page, offset, count); nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_mark_request_dirty(req); nfs_clear_page_tag_locked(req); return 0; } @@ -739,8 +744,6 @@ int nfs_updatepage(struct file *file, struct page *page, status = nfs_writepage_setup(ctx, page, offset, count); if (status < 0) nfs_set_pageerror(page); - else - __set_page_dirty_nobuffers(page); dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); @@ -749,13 +752,12 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { + struct page *page = req->wb_page; - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { - nfs_end_page_writeback(req->wb_page); + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) nfs_inode_remove_request(req); - } else - nfs_end_page_writeback(req->wb_page); nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } static int flush_task_priority(int how) @@ -779,7 +781,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req, int how) { struct inode *inode = req->wb_context->path.dentry->d_inode; - int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; int priority = flush_task_priority(how); struct rpc_task *task; struct rpc_message msg = { @@ -794,9 +795,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req, .callback_ops = call_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = flags, + .flags = RPC_TASK_ASYNC, .priority = priority, }; + int ret = 0; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -835,10 +837,18 @@ static int nfs_write_rpcsetup(struct nfs_page *req, (unsigned long long)data->args.offset); task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); + if (IS_ERR(task)) { + ret = PTR_ERR(task); + goto out; + } + if (how & FLUSH_SYNC) { + ret = rpc_wait_for_completion_task(task); + if (ret == 0) + ret = task->tk_status; + } rpc_put_task(task); - return 0; +out: + return ret; } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -847,9 +857,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req, */ static void nfs_redirty_request(struct nfs_page *req) { + struct page *page = req->wb_page; + nfs_mark_request_dirty(req); - nfs_end_page_writeback(req->wb_page); nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } /* @@ -1084,16 +1096,15 @@ static void nfs_writeback_release_full(void *calldata) if (nfs_write_need_commit(data)) { memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); nfs_mark_request_commit(req); - nfs_end_page_writeback(page); dprintk(" marked for commit\n"); goto next; } dprintk(" OK\n"); remove_request: - nfs_end_page_writeback(page); nfs_inode_remove_request(req); next: nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } nfs_writedata_release(calldata); } @@ -1190,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) +{ + if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) + return 1; + if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, + NFS_INO_COMMIT, nfs_wait_bit_killable, + TASK_KILLABLE)) + return 1; + return 0; +} + +static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +{ + clear_bit(NFS_INO_COMMIT, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); +} + + static void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; @@ -1207,7 +1237,6 @@ static int nfs_commit_rpcsetup(struct list_head *head, { struct nfs_page *first = nfs_list_entry(head->next); struct inode *inode = first->wb_context->path.dentry->d_inode; - int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; int priority = flush_task_priority(how); struct rpc_task *task; struct rpc_message msg = { @@ -1222,7 +1251,7 @@ static int nfs_commit_rpcsetup(struct list_head *head, .callback_ops = &nfs_commit_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = flags, + .flags = RPC_TASK_ASYNC, .priority = priority, }; @@ -1282,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) BDI_RECLAIMABLE); nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1337,6 +1367,7 @@ static void nfs_commit_release(void *calldata) next: nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(data->inode)); nfs_commitdata_release(calldata); } @@ -1351,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = { static int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); - int res; + int may_wait = how & FLUSH_SYNC; + int res = 0; + if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) + goto out; spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); @@ -1360,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how) int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; - } + if (may_wait) + wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + } else + nfs_commit_clear_lock(NFS_I(inode)); +out: return res; } @@ -1432,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) BUG_ON(!PageLocked(page)); for (;;) { + wait_on_page_writeback(page); req = nfs_page_find_request(page); if (req == NULL) break; @@ -1466,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page) .range_start = range_start, .range_end = range_end, }; - struct nfs_page *req; - int need_commit; int ret; while(PagePrivate(page)) { + wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; } - req = nfs_find_and_lock_request(page); - if (!req) - break; - if (IS_ERR(req)) { - ret = PTR_ERR(req); + ret = sync_inode(inode, &wbc); + if (ret < 0) goto out_error; - } - need_commit = test_bit(PG_CLEAN, &req->wb_flags); - nfs_clear_page_tag_locked(req); - if (need_commit) { - ret = nfs_commit_inode(inode, FLUSH_SYNC); - if (ret < 0) - goto out_error; - } } return 0; out_error: diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 04133aacb1e5..fc1c52571c03 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/sunrpc/xdr.h> #include <linux/nfsacl.h> #include <linux/nfs3.h> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index a0c4016413f1..872a5ef550c7 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -12,6 +12,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> */ +#include <linux/slab.h> #include <linux/namei.h> #include <linux/module.h> #include <linux/exportfs.h> diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index f20589d2ae27..6aa5590c3679 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -7,6 +7,7 @@ #include "nfsd.h" /* FIXME: nfsacl.h is a broken header */ #include <linux/nfsacl.h> +#include <linux/gfp.h> #include "cache.h" #include "xdr3.h" #include "vfs.h" diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index e0c4846bad92..a596e9d987e4 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -7,6 +7,7 @@ #include "nfsd.h" /* FIXME: nfsacl.h is a broken header */ #include <linux/nfsacl.h> +#include <linux/gfp.h> #include "cache.h" #include "xdr3.h" #include "vfs.h" diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 88150685df34..e48052615159 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -34,6 +34,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> #include <linux/nfs_fs.h> #include <linux/nfs4_acl.h> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4bc22c763de7..7e32bd394e86 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -32,6 +32,7 @@ */ #include <linux/sunrpc/clnt.h> +#include <linux/slab.h> #include "nfsd.h" #include "state.h" diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 6e2983b27f3c..c78dbf493424 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -36,6 +36,7 @@ #include <linux/nfsd_idmap.h> #include <linux/seq_file.h> #include <linux/sched.h> +#include <linux/slab.h> /* * Cache entry diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 37514c469846..2ab9e8501bfe 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -33,6 +33,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <linux/file.h> +#include <linux/slab.h> #include "cache.h" #include "xdr4.h" diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 98fb98e330b4..7a9ae3254a4b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -32,6 +32,7 @@ */ #include <linux/file.h> +#include <linux/slab.h> #include <linux/namei.h> #include <linux/crypto.h> #include <linux/sched.h> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c97fddbd17db..6a8fedaa4f55 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -34,6 +34,7 @@ #include <linux/file.h> #include <linux/smp_lock.h> +#include <linux/slab.h> #include <linux/namei.h> #include <linux/swap.h> #include <linux/sunrpc/svcauth_gss.h> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c47b4d7bafa7..34ccf815ea8a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -40,6 +40,7 @@ * at the end of nfs4svc_decode_compoundargs. */ +#include <linux/slab.h> #include <linux/namei.h> #include <linux/statfs.h> #include <linux/utsname.h> @@ -160,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = p + (argp->pagelen>>2); + argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; } else { - argp->end = p + (PAGE_SIZE>>2); + argp->end = argp->p + (PAGE_SIZE>>2); argp->pagelen -= PAGE_SIZE; } memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); @@ -1425,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = p + (argp->pagelen>>2); + argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; } else { - argp->end = p + (PAGE_SIZE>>2); + argp->end = argp->p + (PAGE_SIZE>>2); argp->pagelen -= PAGE_SIZE; } } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index da08560c4818..4666a209678a 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -8,6 +8,8 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include <linux/slab.h> + #include "nfsd.h" #include "cache.h" diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 0f0e77f2012f..e3591073098f 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -4,6 +4,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include <linux/slab.h> #include <linux/namei.h> #include <linux/ctype.h> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a11b0e8678ee..6dd5f1970e01 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -25,6 +25,7 @@ #include <linux/xattr.h> #include <linux/jhash.h> #include <linux/ima.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/exportfs.h> #include <linux/writeback.h> diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 3f959f1879d8..7cfb87e692da 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -26,6 +26,7 @@ #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/bitops.h> +#include <linux/slab.h> #include "mdt.h" #include "alloc.h" @@ -425,7 +426,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) - printk(KERN_WARNING "%s: entry numer %llu already freed\n", + printk(KERN_WARNING "%s: entry number %llu already freed\n", __func__, (unsigned long long)req->pr_entry_nr); nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 471e269536ae..447ce47a3306 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -27,6 +27,7 @@ #include <linux/buffer_head.h> #include <linux/mm.h> #include <linux/backing-dev.h> +#include <linux/gfp.h> #include "nilfs.h" #include "mdt.h" #include "dat.h" diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 7cdd98b8d514..76c38e3e19d2 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1879,7 +1879,7 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { - int maxlevel, ret; + int maxlevel = 0, ret; struct nilfs_btree_node *parent; struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); __u64 ptr; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 8880a9e281e7..145f03cd7d3e 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -45,6 +45,7 @@ #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/hash.h> +#include <linux/slab.h> #include <linux/swap.h> #include "nilfs.h" #include "page.h" diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7868cc122ac7..0957b58f909d 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -22,6 +22,7 @@ */ #include <linux/buffer_head.h> +#include <linux/gfp.h> #include <linux/mpage.h> #include <linux/writeback.h> #include <linux/uio.h> diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 313d0a21da48..f90a33d9a5b0 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -23,6 +23,7 @@ #include <linux/fs.h> #include <linux/wait.h> #include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ +#include <linux/slab.h> #include <linux/capability.h> /* capable() */ #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ #include <linux/vmalloc.h> @@ -648,7 +649,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; - void __user *argp = (void * __user *)arg; + void __user *argp = (void __user *)arg; switch (cmd) { case NILFS_IOCTL_CHANGE_CPMODE: diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 06713ffcc7f2..024be8c35bb6 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -26,6 +26,7 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/swap.h> +#include <linux/slab.h> #include "nilfs.h" #include "segment.h" #include "page.h" diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index fc246dba112a..8de3e1e48130 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -29,6 +29,7 @@ #include <linux/list.h> #include <linux/highmem.h> #include <linux/pagevec.h> +#include <linux/gfp.h> #include "nilfs.h" #include "page.h" #include "mdt.h" diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 017bedc761a0..ba43146f3c30 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -23,6 +23,7 @@ #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/swap.h> +#include <linux/slab.h> #include <linux/crc32.h> #include "nilfs.h" #include "segment.h" diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 636eaafd6ea2..17851f77f739 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -25,6 +25,7 @@ #include <linux/writeback.h> #include <linux/crc32.h> #include <linux/backing-dev.h> +#include <linux/slab.h> #include "page.h" #include "segbuf.h" @@ -323,14 +324,14 @@ int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs) int nilfs_wait_on_logs(struct list_head *logs) { struct nilfs_segment_buffer *segbuf; - int err; + int err, ret = 0; list_for_each_entry(segbuf, logs, sb_list) { err = nilfs_segbuf_wait(segbuf); - if (err) - return err; + if (err && !ret) + ret = err; } - return 0; + return ret; } /* diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 69576a95e13f..6a7dbd8451db 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -32,6 +32,7 @@ #include <linux/kthread.h> #include <linux/crc32.h> #include <linux/pagevec.h> +#include <linux/slab.h> #include "nilfs.h" #include "btnode.h" #include "page.h" @@ -1510,6 +1511,12 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) break; + nilfs_clear_logs(&sci->sc_segbufs); + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, sci->sc_freesegs, @@ -1517,12 +1524,6 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, NULL); WARN_ON(err); /* do not happen */ } - nilfs_clear_logs(&sci->sc_segbufs); - - err = nilfs_segctor_extend_segments(sci, nilfs, nadd); - if (unlikely(err)) - return err; - nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } @@ -1897,8 +1898,7 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, list_splice_tail_init(&sci->sc_write_logs, &logs); ret = nilfs_wait_on_logs(&logs); - if (ret) - nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret); + nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); nilfs_cancel_segusage(&logs, nilfs->ns_sufile); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 0cdbc5e7655a..48145f505a6a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; + sb->s_bdi = nilfs->ns_bdi; err = load_nilfs(nilfs, sbi); if (err) diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index e9795f1724d7..1ab974533697 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -29,6 +29,7 @@ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> +#include <linux/slab.h> #include "sb.h" /* the_nilfs struct */ diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 037e878e03fc..fcc2f064af83 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -18,6 +18,7 @@ #include <linux/dcache.h> #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/init.h> #include <linux/module.h> #include <linux/srcu.h> diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 3165d85aada2..0399bcbe09c8 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -87,7 +87,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/writeback.h> /* for inode_lock */ diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index 3e56dbffe729..b3a159b21cfd 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig @@ -15,6 +15,7 @@ config INOTIFY config INOTIFY_USER bool "Inotify support for userspace" + select ANON_INODES select FSNOTIFY default y ---help--- diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index cfce53cb65d7..c3c2c7ac9020 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -23,6 +23,7 @@ #include <linux/errno.h> #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/swap.h> diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 50d3b0c258e3..f5094ee224c1 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -22,6 +22,7 @@ #include <linux/buffer_head.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/swap.h> #include <linux/writeback.h> diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 08f7530e9341..6551c7cbad92 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -25,6 +25,7 @@ #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include "attrib.h" #include "inode.h" diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 9173e82a45d1..fe44d3feee4a 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -21,6 +21,7 @@ */ #include <linux/buffer_head.h> +#include <linux/slab.h> #include "dir.h" #include "aops.h" diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index b681c71d7069..8804f093ba75 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -20,6 +20,7 @@ */ #include <linux/buffer_head.h> +#include <linux/gfp.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/sched.h> diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 2194eff49743..096c135691ae 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -19,6 +19,8 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/slab.h> + #include "aops.h" #include "collate.h" #include "debug.h" diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 1caa0ef0b2bb..b572b6727181 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -21,6 +21,7 @@ */ #include <linux/buffer_head.h> +#include <linux/slab.h> #include <linux/swap.h> #include "attrib.h" diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 2ca00153b6ec..358273e59ade 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -23,6 +23,7 @@ #include <linux/dcache.h> #include <linux/exportfs.h> #include <linux/security.h> +#include <linux/slab.h> #include "attrib.h" #include "debug.h" diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 0501974bedd0..e13fc9e8fcdc 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/string.h> #define MLOG_MASK_PREFIX ML_INODE @@ -30,6 +31,8 @@ #include "alloc.h" #include "dlmglue.h" #include "file.h" +#include "inode.h" +#include "journal.h" #include "ocfs2_fs.h" #include "xattr.h" @@ -166,6 +169,60 @@ static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type) } /* + * Helper function to set i_mode in memory and disk. Some call paths + * will not have di_bh or a journal handle to pass, in which case it + * will create it's own. + */ +static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, + handle_t *handle, umode_t new_mode) +{ + int ret, commit_handle = 0; + struct ocfs2_dinode *di; + + if (di_bh == NULL) { + ret = ocfs2_read_inode_block(inode, &di_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + } else + get_bh(di_bh); + + if (handle == NULL) { + handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), + OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_brelse; + } + + commit_handle = 1; + } + + di = (struct ocfs2_dinode *)di_bh->b_data; + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + inode->i_mode = new_mode; + di->i_mode = cpu_to_le16(inode->i_mode); + + ocfs2_journal_dirty(handle, di_bh); + +out_commit: + if (commit_handle) + ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); +out_brelse: + brelse(di_bh); +out: + return ret; +} + +/* * Set the access or default ACL of an inode. */ static int ocfs2_set_acl(handle_t *handle, @@ -193,9 +250,14 @@ static int ocfs2_set_acl(handle_t *handle, if (ret < 0) return ret; else { - inode->i_mode = mode; if (ret == 0) acl = NULL; + + ret = ocfs2_acl_set_mode(inode, di_bh, + handle, mode); + if (ret) + return ret; + } } break; @@ -283,6 +345,7 @@ int ocfs2_init_acl(handle_t *handle, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0; + mode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { @@ -291,12 +354,17 @@ int ocfs2_init_acl(handle_t *handle, if (IS_ERR(acl)) return PTR_ERR(acl); } - if (!acl) - inode->i_mode &= ~current_umask(); + if (!acl) { + mode = inode->i_mode & ~current_umask(); + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret) { + mlog_errno(ret); + goto cleanup; + } + } } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { struct posix_acl *clone; - mode_t mode; if (S_ISDIR(inode->i_mode)) { ret = ocfs2_set_acl(handle, inode, di_bh, @@ -313,7 +381,7 @@ int ocfs2_init_acl(handle_t *handle, mode = inode->i_mode; ret = posix_acl_create_masq(clone, &mode); if (ret >= 0) { - inode->i_mode = mode; + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); if (ret > 0) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_ACCESS, diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 21c808f752d8..f9d5d3ffc75a 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -25,7 +25,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <cluster/masklog.h> @@ -407,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, struct buffer_head *bh) { int ret = 0; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; mlog_entry_void(); @@ -426,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; + ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); submit_bh(WRITE, bh); wait_on_buffer(bh); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5c9890006708..41d5f1f92d56 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -34,6 +34,7 @@ #include <linux/crc32.h> #include <linux/time.h> #include <linux/debugfs.h> +#include <linux/slab.h> #include "heartbeat.h" #include "tcp.h" diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index c81142e3ef84..ed0c9f367fed 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -19,6 +19,7 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/configfs.h> diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 639024033fce..cf3e16696216 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -44,7 +44,6 @@ * and if they're the last, they fire off the decision. */ #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/workqueue.h> #include <linux/reboot.h> diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index dccc439fa087..12d5eb78a11a 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -28,7 +28,6 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/sysctl.h> @@ -185,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, BUG_ON(!lksb); /* only updates if this node masters the lockres */ + spin_lock(&res->spinlock); if (res->owner == dlm->node_num) { - - spin_lock(&res->spinlock); /* check the lksb flags for the direction */ if (lksb->flags & DLM_LKSB_GET_LVB) { mlog(0, "getting lvb from lockres for %s node\n", @@ -202,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, * here. In the future we might want to clear it at the time * the put is actually done. */ - spin_unlock(&res->spinlock); } + spin_unlock(&res->spinlock); /* reset any lvb flags on the lksb */ lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index f283bce776b4..90803b47cd8c 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -28,7 +28,6 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/sysctl.h> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a659606dcb95..9289b4357d27 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1875,7 +1875,6 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, ok: spin_unlock(&res->spinlock); } - spin_unlock(&dlm->spinlock); // mlog(0, "woo! got an assert_master from node %u!\n", // assert->node_idx); @@ -1926,7 +1925,6 @@ ok: /* master is known, detach if not already detached. * ensures that only one assert_master call will happen * on this mle. */ - spin_lock(&dlm->spinlock); spin_lock(&dlm->master_lock); rr = atomic_read(&mle->mle_refs.refcount); @@ -1959,7 +1957,6 @@ ok: __dlm_put_mle(mle); } spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); } else if (res) { if (res->owner != assert->node_idx) { mlog(0, "assert_master from %u, but current " @@ -1967,6 +1964,7 @@ ok: res->owner, namelen, name); } } + spin_unlock(&dlm->spinlock); done: ret = 0; diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 52ec020ea78b..11a6d1fd1d35 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -28,7 +28,6 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/sysctl.h> diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 49e29ecd0201..b47c1b92b82b 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -28,7 +28,6 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/sysctl.h> diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 1b0de157a08c..b83d6107a1f5 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); * O_RDONLY -> PRMODE level * O_WRONLY -> EXMODE level * - * O_NONBLOCK -> LKM_NOQUEUE + * O_NONBLOCK -> NOQUEUE */ static int dlmfs_decode_open_flags(int open_flags, int *level, int *flags) { if (open_flags & (O_WRONLY|O_RDWR)) - *level = LKM_EXMODE; + *level = DLM_LOCK_EX; else - *level = LKM_PRMODE; + *level = DLM_LOCK_PR; *flags = 0; if (open_flags & O_NONBLOCK) - *flags |= LKM_NOQUEUE; + *flags |= DLM_LKF_NOQUEUE; return 0; } @@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode, * to be able userspace to be able to distinguish a * valid lock request from one that simply couldn't be * granted. */ - if (flags & LKM_NOQUEUE && status == -EAGAIN) + if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN) status = -ETXTBSY; kfree(fp); goto bail; @@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode, status = 0; if (fp) { level = fp->fp_lock_level; - if (level != LKM_IVMODE) + if (level != DLM_LOCK_IV) user_dlm_cluster_unlock(&ip->ip_lockres, level); kfree(fp); @@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp, if ((count + *ppos) > i_size_read(inode)) readlen = i_size_read(inode) - *ppos; else - readlen = count - *ppos; + readlen = count; lvb_buf = kmalloc(readlen, GFP_NOFS); if (!lvb_buf) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index c562a7581cf9..09e3fdfa6d33 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -24,6 +24,7 @@ #include <linux/fs.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/types.h> #include <linux/fiemap.h> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 17947dc8341e..a5fbd9cea968 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -684,6 +684,7 @@ restarted_transaction: if (why == RESTART_META) { mlog(0, "restarting function.\n"); restart_func = 1; + status = 0; } else { BUG_ON(why != RESTART_TRANS); @@ -1981,18 +1982,18 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - if (direct_io) { - ret = generic_segment_checks(iov, &nr_segs, &ocount, - VERIFY_READ); - if (ret) - goto out_dio; + ret = generic_segment_checks(iov, &nr_segs, &ocount, + VERIFY_READ); + if (ret) + goto out_dio; - count = ocount; - ret = generic_write_checks(file, ppos, &count, - S_ISBLK(inode->i_mode)); - if (ret) - goto out_dio; + count = ocount; + ret = generic_write_checks(file, ppos, &count, + S_ISBLK(inode->i_mode)); + if (ret) + goto out_dio; + if (direct_io) { written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, ppos, count, ocount); if (written < 0) { @@ -2007,7 +2008,10 @@ relock: goto out_dio; } } else { - written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); + current->backing_dev_info = file->f_mapping->backing_dev_info; + written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, + ppos, count, 0); + current->backing_dev_info = NULL; } out_dio: @@ -2021,9 +2025,9 @@ out_dio: if (ret < 0) written = ret; - if (!ret && (old_size != i_size_read(inode) || - old_clusters != OCFS2_I(inode)->ip_clusters || - has_refcount)) { + if (!ret && ((old_size != i_size_read(inode)) || + (old_clusters != OCFS2_I(inode)->ip_clusters) || + has_refcount)) { ret = jbd2_journal_force_commit(osb->journal->j_journal); if (ret < 0) written = ret; diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index c6e7213db868..1aa863dd901f 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -26,7 +26,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #define MLOG_MASK_PREFIX ML_SUPER diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 278a223aae14..af189887201c 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -25,7 +25,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/quotaops.h> @@ -559,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); + handle = NULL; mlog_errno(status); goto out; } @@ -640,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail_unlock; } - status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, - orphan_dir_bh); - if (status < 0) { - mlog_errno(status); - goto bail_commit; + if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { + status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, + orphan_dir_bh); + if (status < 0) { + mlog_errno(status); + goto bail_commit; + } } /* set the inodes dtime */ @@ -723,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb, static int ocfs2_wipe_inode(struct inode *inode, struct buffer_head *di_bh) { - int status, orphaned_slot; + int status, orphaned_slot = -1; struct inode *orphan_dir_inode = NULL; struct buffer_head *orphan_dir_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_dinode *di; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; - di = (struct ocfs2_dinode *) di_bh->b_data; - orphaned_slot = le16_to_cpu(di->i_orphaned_slot); + if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { + orphaned_slot = le16_to_cpu(di->i_orphaned_slot); - status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); - if (status) - return status; + status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); + if (status) + return status; - orphan_dir_inode = ocfs2_get_system_file_inode(osb, - ORPHAN_DIR_SYSTEM_INODE, - orphaned_slot); - if (!orphan_dir_inode) { - status = -EEXIST; - mlog_errno(status); - goto bail; - } + orphan_dir_inode = ocfs2_get_system_file_inode(osb, + ORPHAN_DIR_SYSTEM_INODE, + orphaned_slot); + if (!orphan_dir_inode) { + status = -EEXIST; + mlog_errno(status); + goto bail; + } - /* Lock the orphan dir. The lock will be held for the entire - * delete_inode operation. We do this now to avoid races with - * recovery completion on other nodes. */ - mutex_lock(&orphan_dir_inode->i_mutex); - status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); - if (status < 0) { - mutex_unlock(&orphan_dir_inode->i_mutex); + /* Lock the orphan dir. The lock will be held for the entire + * delete_inode operation. We do this now to avoid races with + * recovery completion on other nodes. */ + mutex_lock(&orphan_dir_inode->i_mutex); + status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); + if (status < 0) { + mutex_unlock(&orphan_dir_inode->i_mutex); - mlog_errno(status); - goto bail; + mlog_errno(status); + goto bail; + } } /* we do this while holding the orphan dir lock because we @@ -795,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode, mlog_errno(status); bail_unlock_dir: + if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR) + return status; + ocfs2_inode_unlock(orphan_dir_inode, 1); mutex_unlock(&orphan_dir_inode->i_mutex); brelse(orphan_dir_bh); @@ -890,7 +896,23 @@ static int ocfs2_query_inode_wipe(struct inode *inode, /* Do some basic inode verification... */ di = (struct ocfs2_dinode *) di_bh->b_data; - if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { + if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) && + !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { + /* + * Inodes in the orphan dir must have ORPHANED_FL. The only + * inodes that come back out of the orphan dir are reflink + * targets. A reflink target may be moved out of the orphan + * dir between the time we scan the directory and the time we + * process it. This would lead to HAS_REFCOUNT_FL being set but + * ORPHANED_FL not. + */ + if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { + mlog(0, "Reflinked inode %llu is no longer orphaned. " + "it shouldn't be deleted\n", + (unsigned long long)oi->ip_blkno); + goto bail; + } + /* for lack of a better error? */ status = -EEXIST; mlog(ML_ERROR, diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index ba4fe07b293c..0b28e1921a39 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -100,6 +100,8 @@ struct ocfs2_inode_info #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 /* Does someone have the file open O_DIRECT */ #define OCFS2_INODE_OPEN_DIRECT 0x00000040 +/* Tell the inode wipe code it's not in orphan dir */ +#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080 static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) { diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ca992d91f511..c983715d8d8c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -872,8 +872,10 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, (unsigned long long)la_start_blk, (unsigned long long)blkno); - status = ocfs2_free_clusters(handle, main_bm_inode, - main_bm_bh, blkno, count); + status = ocfs2_release_clusters(handle, + main_bm_inode, + main_bm_bh, blkno, + count); if (status < 0) { mlog_errno(status); goto bail; @@ -984,8 +986,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, } retry_enospc: - (*ac)->ac_bits_wanted = osb->local_alloc_bits; - + (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); if (status == -ENOSPC) { if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == @@ -1061,6 +1062,7 @@ retry_enospc: OCFS2_LA_DISABLED) goto bail; + ac->ac_bits_wanted = osb->local_alloc_default_bits; status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, &cluster_off, diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 544ac6245175..b5cb3ede9408 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - if (__mandatory_lock(inode)) + if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) return -ENOLCK; return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 39737613424a..7898bd3a99f5 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -25,7 +25,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/uio.h> diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index d9cd4e373a53..4cbb18f26c5f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, static int ocfs2_orphan_add(struct ocfs2_super *osb, handle_t *handle, struct inode *inode, - struct ocfs2_dinode *fe, + struct buffer_head *fe_bh, char *name, struct ocfs2_dir_lookup_result *lookup, struct inode *orphan_dir_inode); @@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir, } } - status = ocfs2_add_entry(handle, dentry, inode, - OCFS2_I(inode)->ip_blkno, parent_fe_bh, - &lookup); - if (status < 0) { + /* + * Do this before adding the entry to the directory. We add + * also set d_op after success so that ->d_iput() will cleanup + * the dentry lock even if ocfs2_add_entry() fails below. + */ + status = ocfs2_dentry_attach_lock(dentry, inode, + OCFS2_I(dir)->ip_blkno); + if (status) { mlog_errno(status); goto leave; } + dentry->d_op = &ocfs2_dentry_ops; - status = ocfs2_dentry_attach_lock(dentry, inode, - OCFS2_I(dir)->ip_blkno); - if (status) { + status = ocfs2_add_entry(handle, dentry, inode, + OCFS2_I(inode)->ip_blkno, parent_fe_bh, + &lookup); + if (status < 0) { mlog_errno(status); goto leave; } insert_inode_hash(inode); - dentry->d_op = &ocfs2_dentry_ops; d_instantiate(dentry, inode); status = 0; leave: @@ -445,11 +450,6 @@ leave: ocfs2_free_dir_lookup_result(&lookup); - if ((status < 0) && inode) { - clear_nlink(inode); - iput(inode); - } - if (inode_ac) ocfs2_free_alloc_context(inode_ac); @@ -459,6 +459,17 @@ leave: if (meta_ac) ocfs2_free_alloc_context(meta_ac); + /* + * We should call iput after the i_mutex of the bitmap been + * unlocked in ocfs2_free_alloc_context, or the + * ocfs2_delete_inode will mutex_lock again. + */ + if ((status < 0) && inode) { + OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; + clear_nlink(inode); + iput(inode); + } + mlog_exit(status); return status; @@ -879,7 +890,7 @@ static int ocfs2_unlink(struct inode *dir, fe = (struct ocfs2_dinode *) fe_bh->b_data; if (inode_is_unlinkable(inode)) { - status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, + status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, &orphan_insert, orphan_dir); if (status < 0) { mlog_errno(status); @@ -1300,7 +1311,7 @@ static int ocfs2_rename(struct inode *old_dir, if (S_ISDIR(new_inode->i_mode) || (ocfs2_read_links_count(newfe) == 1)) { status = ocfs2_orphan_add(osb, handle, new_inode, - newfe, orphan_name, + newfe_bh, orphan_name, &orphan_insert, orphan_dir); if (status < 0) { mlog_errno(status); @@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir, } } - status = ocfs2_add_entry(handle, dentry, inode, - le64_to_cpu(fe->i_blkno), parent_fe_bh, - &lookup); - if (status < 0) { + /* + * Do this before adding the entry to the directory. We add + * also set d_op after success so that ->d_iput() will cleanup + * the dentry lock even if ocfs2_add_entry() fails below. + */ + status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); + if (status) { mlog_errno(status); goto bail; } + dentry->d_op = &ocfs2_dentry_ops; - status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); - if (status) { + status = ocfs2_add_entry(handle, dentry, inode, + le64_to_cpu(fe->i_blkno), parent_fe_bh, + &lookup); + if (status < 0) { mlog_errno(status); goto bail; } insert_inode_hash(inode); - dentry->d_op = &ocfs2_dentry_ops; d_instantiate(dentry, inode); bail: if (status < 0 && did_quota) @@ -1811,6 +1827,7 @@ bail: if (xattr_ac) ocfs2_free_alloc_context(xattr_ac); if ((status < 0) && inode) { + OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; clear_nlink(inode); iput(inode); } @@ -1911,7 +1928,7 @@ leave: static int ocfs2_orphan_add(struct ocfs2_super *osb, handle_t *handle, struct inode *inode, - struct ocfs2_dinode *fe, + struct buffer_head *fe_bh, char *name, struct ocfs2_dir_lookup_result *lookup, struct inode *orphan_dir_inode) @@ -1919,6 +1936,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, struct buffer_head *orphan_dir_bh = NULL; int status = 0; struct ocfs2_dinode *orphan_fe; + struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); @@ -1959,13 +1977,31 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, goto leave; } + /* + * We're going to journal the change of i_flags and i_orphaned_slot. + * It's safe anyway, though some callers may duplicate the journaling. + * Journaling within the func just make the logic look more + * straightforward. + */ + status = ocfs2_journal_access_di(handle, + INODE_CACHE(inode), + fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto leave; + } + le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); + OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; /* Record which orphan dir our inode now resides * in. delete_inode will use this to determine which orphan * dir to lock. */ fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); + ocfs2_journal_dirty(handle, fe_bh); + mlog(0, "Inode %llu orphaned in slot %d\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); @@ -2123,7 +2159,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, } di = (struct ocfs2_dinode *)new_di_bh->b_data; - status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name, + status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, &orphan_insert, orphan_dir); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1238b491db90..adf5e2ebc2c4 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -763,8 +763,18 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); } -#define ocfs2_set_bit ext2_set_bit -#define ocfs2_clear_bit ext2_clear_bit +static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) +{ + ext2_set_bit(bit, bitmap); +} +#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) + +static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) +{ + ext2_clear_bit(bit, bitmap); +} +#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) + #define ocfs2_test_bit ext2_test_bit #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit #define ocfs2_find_next_bit ext2_find_next_bit diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 355f41d1d520..ab42a74c7539 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -3,6 +3,7 @@ */ #include <linux/spinlock.h> #include <linux/fs.h> +#include <linux/slab.h> #include <linux/quota.h> #include <linux/quotaops.h> #include <linux/dqblk_qtree.h> diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index a6467f3d262e..9ad49305f450 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -3,6 +3,7 @@ */ #include <linux/fs.h> +#include <linux/slab.h> #include <linux/quota.h> #include <linux/quotaops.h> #include <linux/module.h> diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9e96921dffda..5cbcd0f008fc 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -37,7 +37,6 @@ #include <linux/bio.h> #include <linux/blkdev.h> -#include <linux/gfp.h> #include <linux/slab.h> #include <linux/writeback.h> #include <linux/pagevec.h> @@ -4075,6 +4074,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode, OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; spin_unlock(&OCFS2_I(t_inode)->ip_lock); i_size_write(t_inode, size); + t_inode->i_blocks = s_inode->i_blocks; di->i_xattr_inline_size = s_di->i_xattr_inline_size; di->i_clusters = s_di->i_clusters; @@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode, di->i_attr = s_di->i_attr; if (preserve) { + t_inode->i_uid = s_inode->i_uid; + t_inode->i_gid = s_inode->i_gid; + t_inode->i_mode = s_inode->i_mode; di->i_uid = s_di->i_uid; di->i_gid = s_di->i_gid; di->i_mode = s_di->i_mode; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 7020e1253ffa..0d3049f696c5 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/crc32.h> +#include <linux/slab.h> #include <linux/module.h> /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 5ae8812b2864..2dc57bca0688 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include <linux/miscdevice.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/reboot.h> #include <asm/uaccess.h> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c3c60bc3e072..19ba00f28547 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -95,13 +95,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, struct buffer_head *group_bh, unsigned int bit_off, unsigned int num_bits); -static inline int ocfs2_block_group_clear_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits); - static int ocfs2_relink_block_group(handle_t *handle, struct inode *alloc_inode, struct buffer_head *fe_bh, @@ -152,7 +145,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) #define do_error(fmt, ...) \ do{ \ - if (clean_error) \ + if (resize) \ mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ else \ ocfs2_error(sb, fmt, ##__VA_ARGS__); \ @@ -160,7 +153,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) static int ocfs2_validate_gd_self(struct super_block *sb, struct buffer_head *bh, - int clean_error) + int resize) { struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; @@ -211,7 +204,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb, static int ocfs2_validate_gd_parent(struct super_block *sb, struct ocfs2_dinode *di, struct buffer_head *bh, - int clean_error) + int resize) { unsigned int max_bits; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; @@ -233,8 +226,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb, return -EINVAL; } - if (le16_to_cpu(gd->bg_chain) >= - le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { + /* In resize, we may meet the case bg_chain == cl_next_free_rec. */ + if ((le16_to_cpu(gd->bg_chain) > + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) || + ((le16_to_cpu(gd->bg_chain) == + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) { do_error("Group descriptor #%llu has bad chain %u", (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_chain)); @@ -1975,18 +1971,18 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, bits_wanted, cluster_start, num_clusters); } -static inline int ocfs2_block_group_clear_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits) +static int ocfs2_block_group_clear_bits(handle_t *handle, + struct inode *alloc_inode, + struct ocfs2_group_desc *bg, + struct buffer_head *group_bh, + unsigned int bit_off, + unsigned int num_bits, + void (*undo_fn)(unsigned int bit, + unsigned long *bmap)) { int status; unsigned int tmp; - int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; struct ocfs2_group_desc *undo_bg = NULL; - int cluster_bitmap = 0; mlog_entry_void(); @@ -1996,20 +1992,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, mlog(0, "off = %u, num = %u\n", bit_off, num_bits); - if (ocfs2_is_cluster_bitmap(alloc_inode)) - journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - + BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), - group_bh, journal_type); + group_bh, + undo_fn ? + OCFS2_JOURNAL_ACCESS_UNDO : + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } - if (ocfs2_is_cluster_bitmap(alloc_inode)) - cluster_bitmap = 1; - - if (cluster_bitmap) { + if (undo_fn) { jbd_lock_bh_state(group_bh); undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; @@ -2020,13 +2014,13 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, while(tmp--) { ocfs2_clear_bit((bit_off + tmp), (unsigned long *) bg->bg_bitmap); - if (cluster_bitmap) - ocfs2_set_bit(bit_off + tmp, - (unsigned long *) undo_bg->bg_bitmap); + if (undo_fn) + undo_fn(bit_off + tmp, + (unsigned long *) undo_bg->bg_bitmap); } le16_add_cpu(&bg->bg_free_bits_count, num_bits); - if (cluster_bitmap) + if (undo_fn) jbd_unlock_bh_state(group_bh); status = ocfs2_journal_dirty(handle, group_bh); @@ -2039,12 +2033,14 @@ bail: /* * expects the suballoc inode to already be locked. */ -int ocfs2_free_suballoc_bits(handle_t *handle, - struct inode *alloc_inode, - struct buffer_head *alloc_bh, - unsigned int start_bit, - u64 bg_blkno, - unsigned int count) +static int _ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count, + void (*undo_fn)(unsigned int bit, + unsigned long *bitmap)) { int status = 0; u32 tmp_used; @@ -2079,7 +2075,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, status = ocfs2_block_group_clear_bits(handle, alloc_inode, group, group_bh, - start_bit, count); + start_bit, count, undo_fn); if (status < 0) { mlog_errno(status); goto bail; @@ -2110,6 +2106,17 @@ bail: return status; } +int ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count) +{ + return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh, + start_bit, bg_blkno, count, NULL); +} + int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, @@ -2123,11 +2130,13 @@ int ocfs2_free_dinode(handle_t *handle, inode_alloc_bh, bit, bg_blkno, 1); } -int ocfs2_free_clusters(handle_t *handle, - struct inode *bitmap_inode, - struct buffer_head *bitmap_bh, - u64 start_blk, - unsigned int num_clusters) +static int _ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters, + void (*undo_fn)(unsigned int bit, + unsigned long *bitmap)) { int status; u16 bg_start_bit; @@ -2154,9 +2163,9 @@ int ocfs2_free_clusters(handle_t *handle, mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", (unsigned long long)bg_blkno, bg_start_bit); - status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, - bg_start_bit, bg_blkno, - num_clusters); + status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, + bg_start_bit, bg_blkno, + num_clusters, undo_fn); if (status < 0) { mlog_errno(status); goto out; @@ -2170,6 +2179,32 @@ out: return status; } +int ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters) +{ + return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, + start_blk, num_clusters, + _ocfs2_set_bit); +} + +/* + * Give never-used clusters back to the global bitmap. We don't need + * to protect these bits in the undo buffer. + */ +int ocfs2_release_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters) +{ + return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, + start_blk, num_clusters, + _ocfs2_clear_bit); +} + static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) { printk("Block Group:\n"); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index fa60723c43e8..e0f46df357e6 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -127,6 +127,11 @@ int ocfs2_free_clusters(handle_t *handle, struct buffer_head *bitmap_bh, u64 start_blk, unsigned int num_clusters); +int ocfs2_release_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters); static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) { diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index 40e53702948c..bfe7190cdbf1 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c @@ -25,7 +25,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #define MLOG_MASK_PREFIX ML_INODE diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d1b0d386f6d1..3e7773089b96 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1622,7 +1622,7 @@ static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) /* Now tell xh->xh_entries about it */ for (i = 0; i < count; i++) { offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); - if (offset < namevalue_offset) + if (offset <= namevalue_offset) le16_add_cpu(&xh->xh_entries[i].xe_name_offset, namevalue_size); } @@ -6528,13 +6528,11 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode, int indexed) { int ret; - struct ocfs2_alloc_context *meta_ac; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_xattr_set_ctxt ctxt = { - .meta_ac = meta_ac, - }; + struct ocfs2_xattr_set_ctxt ctxt; - ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); + memset(&ctxt, 0, sizeof(ctxt)); + ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); if (ret < 0) { mlog_errno(ret); return ret; @@ -6556,7 +6554,7 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode, ocfs2_commit_trans(osb, ctxt.handle); out: - ocfs2_free_alloc_context(meta_ac); + ocfs2_free_alloc_context(ctxt.meta_ac); return ret; } diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 75d9b5ba1d45..c82af6acc2e7 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -6,6 +6,7 @@ #include <linux/version.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/fs.h> #include <linux/vfs.h> #include <linux/parser.h> diff --git a/fs/open.c b/fs/open.c index e17f54454b50..74e5cd9f718e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -10,7 +10,6 @@ #include <linux/fdtable.h> #include <linux/fsnotify.h> #include <linux/module.h> -#include <linux/slab.h> #include <linux/tty.h> #include <linux/namei.h> #include <linux/backing-dev.h> @@ -20,6 +19,7 @@ #include <linux/mount.h> #include <linux/vfs.h> #include <linux/fcntl.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/fs.h> #include <linux/personality.h> diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e8865c11777f..e238ab23a9e7 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> +#include <linux/slab.h> #include <linux/kmod.h> #include <linux/ctype.h> #include <linux/genhd.h> diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 49cfd5f54238..91babdae7587 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -95,6 +95,7 @@ ************************************************************/ #include <linux/crc32.h> #include <linux/math64.h> +#include <linux/slab.h> #include "check.h" #include "efi.h" diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 0028d2ef0662..90be97f1f5a8 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -31,14 +31,17 @@ */ #include <asm/unaligned.h> -#define SYS_IND(p) (get_unaligned(&p->sys_ind)) -#define NR_SECTS(p) ({ __le32 __a = get_unaligned(&p->nr_sects); \ - le32_to_cpu(__a); \ - }) +#define SYS_IND(p) get_unaligned(&p->sys_ind) -#define START_SECT(p) ({ __le32 __a = get_unaligned(&p->start_sect); \ - le32_to_cpu(__a); \ - }) +static inline sector_t nr_sects(struct partition *p) +{ + return (sector_t)get_unaligned_le32(&p->nr_sects); +} + +static inline sector_t start_sect(struct partition *p) +{ + return (sector_t)get_unaligned_le32(&p->start_sect); +} static inline int is_extended_partition(struct partition *p) { @@ -104,13 +107,13 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev) static void parse_extended(struct parsed_partitions *state, struct block_device *bdev, - u32 first_sector, u32 first_size) + sector_t first_sector, sector_t first_size) { struct partition *p; Sector sect; unsigned char *data; - u32 this_sector, this_size; - int sector_size = bdev_logical_block_size(bdev) / 512; + sector_t this_sector, this_size; + sector_t sector_size = bdev_logical_block_size(bdev) / 512; int loopct = 0; /* number of links followed without finding a data partition */ int i; @@ -145,14 +148,14 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, * First process the data partition(s) */ for (i=0; i<4; i++, p++) { - u32 offs, size, next; - if (!NR_SECTS(p) || is_extended_partition(p)) + sector_t offs, size, next; + if (!nr_sects(p) || is_extended_partition(p)) continue; /* Check the 3rd and 4th entries - these sometimes contain random garbage */ - offs = START_SECT(p)*sector_size; - size = NR_SECTS(p)*sector_size; + offs = start_sect(p)*sector_size; + size = nr_sects(p)*sector_size; next = this_sector + offs; if (i >= 2) { if (offs + size > this_size) @@ -179,13 +182,13 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, */ p -= 4; for (i=0; i<4; i++, p++) - if (NR_SECTS(p) && is_extended_partition(p)) + if (nr_sects(p) && is_extended_partition(p)) break; if (i == 4) goto done; /* nothing left to do */ - this_sector = first_sector + START_SECT(p) * sector_size; - this_size = NR_SECTS(p) * sector_size; + this_sector = first_sector + start_sect(p) * sector_size; + this_size = nr_sects(p) * sector_size; put_dev_sector(sect); } done: @@ -197,7 +200,7 @@ done: static void parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, - u32 offset, u32 size, int origin) + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_SOLARIS_X86_PARTITION Sector sect; @@ -244,7 +247,7 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, */ static void parse_bsd(struct parsed_partitions *state, struct block_device *bdev, - u32 offset, u32 size, int origin, char *flavour, + sector_t offset, sector_t size, int origin, char *flavour, int max_partitions) { Sector sect; @@ -263,7 +266,7 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev, if (le16_to_cpu(l->d_npartitions) < max_partitions) max_partitions = le16_to_cpu(l->d_npartitions); for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) { - u32 bsd_start, bsd_size; + sector_t bsd_start, bsd_size; if (state->next == state->limit) break; @@ -290,7 +293,7 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev, static void parse_freebsd(struct parsed_partitions *state, struct block_device *bdev, - u32 offset, u32 size, int origin) + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, bdev, offset, size, origin, @@ -300,7 +303,7 @@ parse_freebsd(struct parsed_partitions *state, struct block_device *bdev, static void parse_netbsd(struct parsed_partitions *state, struct block_device *bdev, - u32 offset, u32 size, int origin) + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, bdev, offset, size, origin, @@ -310,7 +313,7 @@ parse_netbsd(struct parsed_partitions *state, struct block_device *bdev, static void parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, - u32 offset, u32 size, int origin) + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, bdev, offset, size, origin, @@ -324,7 +327,7 @@ parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, */ static void parse_unixware(struct parsed_partitions *state, struct block_device *bdev, - u32 offset, u32 size, int origin) + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_UNIXWARE_DISKLABEL Sector sect; @@ -348,7 +351,8 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev, if (p->s_label != UNIXWARE_FS_UNUSED) put_partition(state, state->next++, - START_SECT(p), NR_SECTS(p)); + le32_to_cpu(p->start_sect), + le32_to_cpu(p->nr_sects)); p++; } put_dev_sector(sect); @@ -363,7 +367,7 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev, */ static void parse_minix(struct parsed_partitions *state, struct block_device *bdev, - u32 offset, u32 size, int origin) + sector_t offset, sector_t size, int origin) { #ifdef CONFIG_MINIX_SUBPARTITION Sector sect; @@ -390,7 +394,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, /* add each partition in use */ if (SYS_IND(p) == MINIX_PARTITION) put_partition(state, state->next++, - START_SECT(p), NR_SECTS(p)); + start_sect(p), nr_sects(p)); } printk(" >\n"); } @@ -401,7 +405,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, static struct { unsigned char id; void (*parse)(struct parsed_partitions *, struct block_device *, - u32, u32, int); + sector_t, sector_t, int); } subtypes[] = { {FREEBSD_PARTITION, parse_freebsd}, {NETBSD_PARTITION, parse_netbsd}, @@ -415,7 +419,7 @@ static struct { int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) { - int sector_size = bdev_logical_block_size(bdev) / 512; + sector_t sector_size = bdev_logical_block_size(bdev) / 512; Sector sect; unsigned char *data; struct partition *p; @@ -483,14 +487,21 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) state->next = 5; for (slot = 1 ; slot <= 4 ; slot++, p++) { - u32 start = START_SECT(p)*sector_size; - u32 size = NR_SECTS(p)*sector_size; + sector_t start = start_sect(p)*sector_size; + sector_t size = nr_sects(p)*sector_size; if (!size) continue; if (is_extended_partition(p)) { - /* prevent someone doing mkfs or mkswap on an - extended partition, but leave room for LILO */ - put_partition(state, slot, start, size == 1 ? 1 : 2); + /* + * prevent someone doing mkfs or mkswap on an + * extended partition, but leave room for LILO + * FIXME: this uses one logical sector for > 512b + * sector, although it may not be enough/proper. + */ + sector_t n = 2; + n = min(size, max(sector_size, n)); + put_partition(state, slot, start, n); + printk(" <"); parse_extended(state, bdev, start, size); printk(" >"); @@ -513,7 +524,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) unsigned char id = SYS_IND(p); int n; - if (!NR_SECTS(p)) + if (!nr_sects(p)) continue; for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++) @@ -521,8 +532,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) if (!subtypes[n].parse) continue; - subtypes[n].parse(state, bdev, START_SECT(p)*sector_size, - NR_SECTS(p)*sector_size, slot); + subtypes[n].parse(state, bdev, start_sect(p)*sector_size, + nr_sects(p)*sector_size, slot); } put_dev_sector(sect); return 1; diff --git a/fs/proc/array.c b/fs/proc/array.c index aa8637b81028..e51f2ec2c5e5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -68,7 +68,6 @@ #include <linux/hugetlb.h> #include <linux/pagemap.h> #include <linux/swap.h> -#include <linux/slab.h> #include <linux/smp.h> #include <linux/signal.h> #include <linux/highmem.h> diff --git a/fs/proc/base.c b/fs/proc/base.c index a7310841c831..8418fcc0a6ab 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -81,6 +81,7 @@ #include <linux/elf.h> #include <linux/pid_namespace.h> #include <linux/fs_struct.h> +#include <linux/slab.h> #include "internal.h" /* NOTE: @@ -442,12 +443,13 @@ static const struct file_operations proc_lstats_operations = { unsigned long badness(struct task_struct *p, unsigned long uptime); static int proc_oom_score(struct task_struct *task, char *buffer) { - unsigned long points; + unsigned long points = 0; struct timespec uptime; do_posix_clock_monotonic_gettime(&uptime); read_lock(&tasklist_lock); - points = badness(task->group_leader, uptime.tv_sec); + if (pid_alive(task)) + points = badness(task, uptime.tv_sec); read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } @@ -2907,7 +2909,7 @@ out_no_task: */ static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), - DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 08f4d71dacd7..43c127490606 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -13,6 +13,7 @@ #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/idr.h> diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 445a02bcaab3..d35b23238fb1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/smp_lock.h> #include <linux/sysctl.h> +#include <linux/slab.h> #include <asm/system.h> #include <asm/uaccess.h> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index a44a7897fd4d..19979a2ce272 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -19,6 +19,7 @@ #include <linux/highmem.h> #include <linux/bootmem.h> #include <linux/init.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> #include <linux/list.h> @@ -490,7 +491,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) } read_unlock(&kclist_lock); - if (m == NULL) { + if (&m->list == &kclist_head) { if (clear_user(buffer, tsz)) return -EFAULT; } else if (is_vmalloc_or_module_addr((void *)start)) { diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 9fe7d7ebe115..b1822dde55c2 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -21,7 +21,6 @@ #include <linux/mmzone.h> #include <linux/pagemap.h> #include <linux/swap.h> -#include <linux/slab.h> #include <linux/smp.h> #include <linux/seq_file.h> #include <linux/hugetlb.h> diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index f8650dce74fb..ce94801f48ca 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/of.h> #include <linux/module.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 04d1270f1c38..9020ac15baaa 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -14,6 +14,7 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/module.h> diff --git a/fs/proc/stat.c b/fs/proc/stat.c index b9b7aad2003d..bf31b03fc275 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -1,6 +1,5 @@ #include <linux/cpumask.h> #include <linux/fs.h> -#include <linux/gfp.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 183f8ff5f400..070553427dd5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -4,6 +4,7 @@ #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/ptrace.h> +#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> #include <linux/swap.h> @@ -406,6 +407,7 @@ static int show_smap(struct seq_file *m, void *v) memset(&mss, 0, sizeof mss); mss.vma = vma; + /* mmap_sem is held in m_start */ if (vma->vm_mm && !is_vm_hugetlb_page(vma)) walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); @@ -552,7 +554,8 @@ const struct file_operations proc_clear_refs_operations = { }; struct pagemapread { - u64 __user *out, *end; + int pos, len; + u64 *buffer; }; #define PM_ENTRY_BYTES sizeof(u64) @@ -575,10 +578,8 @@ struct pagemapread { static int add_to_pagemap(unsigned long addr, u64 pfn, struct pagemapread *pm) { - if (put_user(pfn, pm->out)) - return -EFAULT; - pm->out++; - if (pm->out >= pm->end) + pm->buffer[pm->pos++] = pfn; + if (pm->pos >= pm->len) return PM_END_OF_BUFFER; return 0; } @@ -661,31 +662,18 @@ static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) return pme; } -static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr, - unsigned long end, struct mm_walk *walk) +/* This function walks within one hugetlb entry in the single call */ +static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) { - struct vm_area_struct *vma; struct pagemapread *pm = walk->private; - struct hstate *hs = NULL; int err = 0; + u64 pfn; - vma = find_vma(walk->mm, addr); - if (vma) - hs = hstate_vma(vma); for (; addr != end; addr += PAGE_SIZE) { - u64 pfn = PM_NOT_PRESENT; - - if (vma && (addr >= vma->vm_end)) { - vma = find_vma(walk->mm, addr); - if (vma) - hs = hstate_vma(vma); - } - - if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) { - /* calculate pfn of the "raw" page in the hugepage. */ - int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT; - pfn = huge_pte_to_pagemap_entry(*pte, offset); - } + int offset = (addr & ~hmask) >> PAGE_SHIFT; + pfn = huge_pte_to_pagemap_entry(*pte, offset); err = add_to_pagemap(addr, pfn, pm); if (err) return err; @@ -720,21 +708,20 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr, * determine which areas of memory are actually mapped and llseek to * skip over unmapped regions. */ +#define PAGEMAP_WALK_SIZE (PMD_SIZE) static ssize_t pagemap_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); - struct page **pages, *page; - unsigned long uaddr, uend; struct mm_struct *mm; struct pagemapread pm; - int pagecount; int ret = -ESRCH; struct mm_walk pagemap_walk = {}; unsigned long src; unsigned long svpfn; unsigned long start_vaddr; unsigned long end_vaddr; + int copied = 0; if (!task) goto out; @@ -757,35 +744,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!mm) goto out_task; - - uaddr = (unsigned long)buf & PAGE_MASK; - uend = (unsigned long)(buf + count); - pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; - ret = 0; - if (pagecount == 0) - goto out_mm; - pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); + pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; - if (!pages) + if (!pm.buffer) goto out_mm; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, uaddr, pagecount, - 1, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); - - if (ret < 0) - goto out_free; - - if (ret != pagecount) { - pagecount = ret; - ret = -EFAULT; - goto out_pages; - } - - pm.out = (u64 __user *)buf; - pm.end = (u64 __user *)(buf + count); - pagemap_walk.pmd_entry = pagemap_pte_range; pagemap_walk.pte_hole = pagemap_pte_hole; pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; @@ -807,23 +771,36 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, * user buffer is tracked in "pm", and the walk * will stop when we hit the end of the buffer. */ - ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); - if (ret == PM_END_OF_BUFFER) - ret = 0; - /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += (char __user *)pm.out - buf; - if (!ret) - ret = (char __user *)pm.out - buf; - -out_pages: - for (; pagecount; pagecount--) { - page = pages[pagecount-1]; - if (!PageReserved(page)) - SetPageDirty(page); - page_cache_release(page); + ret = 0; + while (count && (start_vaddr < end_vaddr)) { + int len; + unsigned long end; + + pm.pos = 0; + end = start_vaddr + PAGEMAP_WALK_SIZE; + /* overflow ? */ + if (end < start_vaddr || end > end_vaddr) + end = end_vaddr; + down_read(&mm->mmap_sem); + ret = walk_page_range(start_vaddr, end, &pagemap_walk); + up_read(&mm->mmap_sem); + start_vaddr = end; + + len = min(count, PM_ENTRY_BYTES * pm.pos); + if (copy_to_user(buf, pm.buffer, len)) { + ret = -EFAULT; + goto out_free; + } + copied += len; + buf += len; + count -= len; } + *ppos += copied; + if (!ret || ret == PM_END_OF_BUFFER) + ret = copied; + out_free: - kfree(pages); + kfree(pm.buffer); out_mm: mmput(mm); out_task: diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 5d9fd64ef81a..46d4b5d72bd3 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -5,6 +5,7 @@ #include <linux/fs_struct.h> #include <linux/mount.h> #include <linux/ptrace.h> +#include <linux/slab.h> #include <linux/seq_file.h> #include "internal.h" diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 0872afa58d39..9fbc99ec799a 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -12,6 +12,7 @@ #include <linux/user.h> #include <linux/elf.h> #include <linux/elfcore.h> +#include <linux/slab.h> #include <linux/highmem.h> #include <linux/bootmem.h> #include <linux/init.h> diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index dad7fb247ddc..3e21b1e2ad3a 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -33,6 +33,14 @@ config PRINT_QUOTA_WARNING Note that this behavior is currently deprecated and may go away in future. Please use notification via netlink socket instead. +config QUOTA_DEBUG + bool "Additional quota sanity checks" + depends on QUOTA + default n + help + If you say Y here, quota subsystem will perform some additional + sanity checks of quota internal structures. If unsure, say N. + # Generic support for tree structured quota files. Selected when needed. config QUOTA_TREE tristate diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index e0b870f4749f..788b5802a7ce 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -80,8 +80,6 @@ #include <asm/uaccess.h> -#define __DQUOT_PARANOIA - /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas * and quota formats, dqstats structure containing statistics about the lists @@ -695,7 +693,7 @@ void dqput(struct dquot *dquot) if (!dquot) return; -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG if (!atomic_read(&dquot->dq_count)) { printk("VFS: dqput: trying to free free dquot\n"); printk("VFS: device %s, dquot of %s %d\n", @@ -748,7 +746,7 @@ we_slept: goto we_slept; } atomic_dec(&dquot->dq_count); -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG /* sanity check */ BUG_ON(!list_empty(&dquot->dq_free)); #endif @@ -845,7 +843,7 @@ we_slept: dquot = NULL; goto out; } -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ #endif out: @@ -874,14 +872,18 @@ static int dqinit_needed(struct inode *inode, int type) static void add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; +#ifdef CONFIG_QUOTA_DEBUG int reserved = 0; +#endif spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) continue; +#ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; +#endif if (!atomic_read(&inode->i_writecount)) continue; if (!dqinit_needed(inode, type)) @@ -903,11 +905,13 @@ static void add_dquot_ref(struct super_block *sb, int type) spin_unlock(&inode_lock); iput(old_inode); +#ifdef CONFIG_QUOTA_DEBUG if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened before quota" " was turned on thus quota information is probably " "inconsistent. Please run quotacheck(8).\n", sb->s_id); } +#endif } /* @@ -934,7 +938,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type, inode->i_dquot[type] = NULL; if (dquot) { if (dqput_blocks(dquot)) { -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG if (atomic_read(&dquot->dq_count) != 1) printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); #endif @@ -2322,34 +2326,34 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) if (di->dqb_valid & QIF_SPACE) { dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace; check_blim = 1; - __set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); + set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_BLIMITS) { dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit); dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit); check_blim = 1; - __set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); + set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_INODES) { dm->dqb_curinodes = di->dqb_curinodes; check_ilim = 1; - __set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); + set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_ILIMITS) { dm->dqb_isoftlimit = di->dqb_isoftlimit; dm->dqb_ihardlimit = di->dqb_ihardlimit; check_ilim = 1; - __set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); + set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_BTIME) { dm->dqb_btime = di->dqb_btime; check_blim = 1; - __set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); + set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_ITIME) { dm->dqb_itime = di->dqb_itime; check_ilim = 1; - __set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); + set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } if (check_blim) { diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 2663ed90fb03..d67908b407d9 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/quotaops.h> #include <linux/sched.h> +#include <linux/slab.h> #include <net/netlink.h> #include <net/genetlink.h> diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 1739a4aba25f..5ea4ad81a429 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -21,6 +21,7 @@ #include <linux/pagevec.h> #include <linux/mman.h> #include <linux/sched.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a6090aa1a7c1..c94853473ca9 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -35,6 +35,7 @@ #include <linux/sched.h> #include <linux/parser.h> #include <linux/magic.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include "internal.h" diff --git a/fs/read_write.c b/fs/read_write.c index b7f4a1f94d48..113386d6fd2d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -258,6 +258,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; kiocb.ki_left = len; + kiocb.ki_nbytes = len; for (;;) { ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); @@ -313,6 +314,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; kiocb.ki_left = len; + kiocb.ki_nbytes = len; for (;;) { ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index c094f58c7448..07930449a958 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -8,6 +8,7 @@ #include <linux/reiserfs_fs.h> #include <linux/stat.h> #include <linux/buffer_head.h> +#include <linux/slab.h> #include <asm/uaccess.h> extern const struct reiserfs_key MIN_KEY; @@ -45,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir, struct reiserfs_de_head *deh) { struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; - if (reiserfs_expose_privroot(dir->d_sb)) - return 0; return (dir == dir->d_parent && privroot->d_inode && deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); } diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 6591cb21edf6..1e4250bc3a6f 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -35,6 +35,7 @@ **/ #include <linux/time.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/reiserfs_fs.h> #include <linux/buffer_head.h> diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d1da94b82d8f..dc2c65e04853 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -11,6 +11,7 @@ #include <linux/smp_lock.h> #include <linux/pagemap.h> #include <linux/highmem.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/unaligned.h> #include <linux/buffer_head.h> diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index ba98546fabbd..19fbc810e8e7 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -50,6 +50,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/uaccess.h> +#include <linux/slab.h> #include <asm/system.h> @@ -2217,6 +2218,15 @@ static int journal_read_transaction(struct super_block *sb, brelse(d_bh); return 1; } + + if (bdev_read_only(sb->s_bdev)) { + reiserfs_warning(sb, "clm-2076", + "device is readonly, unable to replay log"); + brelse(c_bh); + brelse(d_bh); + return -EROFS; + } + trans_id = get_desc_trans_id(desc); /* now we know we've got a good transaction, and it was inside the valid time ranges */ log_blocks = kmalloc(get_desc_trans_len(desc) * @@ -2459,12 +2469,6 @@ static int journal_read(struct super_block *sb) goto start_log_replay; } - if (continue_replay && bdev_read_only(sb->s_bdev)) { - reiserfs_warning(sb, "clm-2076", - "device is readonly, unable to replay log"); - return -1; - } - /* ok, there are transactions that need to be replayed. start with the first log block, find ** all the valid transactions, and pick out the oldest. */ diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 96e4cbbfaa18..d0c43cb99ffc 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -13,6 +13,7 @@ #include <linux/time.h> #include <linux/bitops.h> +#include <linux/slab.h> #include <linux/reiserfs_fs.h> #include <linux/reiserfs_acl.h> #include <linux/reiserfs_xattr.h> diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 04bf5d791bda..59125fb36d42 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -12,6 +12,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/time.h> #include <asm/uaccess.h> @@ -1618,10 +1619,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) save_mount_options(s, data); sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); - if (!sbi) { - errval = -ENOMEM; - goto error_alloc; - } + if (!sbi) + return -ENOMEM; s->s_fs_info = sbi; /* Set default values for options: non-aggressive tails, RO on errors */ REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); @@ -1878,12 +1877,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return (0); error: - reiserfs_write_unlock(s); -error_alloc: if (jinit_done) { /* kill the commit thread, free journal ram */ journal_release_error(NULL, s); } + reiserfs_write_unlock(s); + reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 37d034ca7d99..e7cc00e636dc 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -38,6 +38,7 @@ #include <linux/dcache.h> #include <linux/namei.h> #include <linux/errno.h> +#include <linux/gfp.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> @@ -553,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, if (!err && new_size < i_size_read(dentry->d_inode)) { struct iattr newattrs = { .ia_ctime = current_fs_time(inode->i_sb), - .ia_size = buffer_size, + .ia_size = new_size, .ia_valid = ATTR_SIZE | ATTR_CTIME, }; @@ -972,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask) return generic_permission(inode, mask, NULL); } -/* This will catch lookups from the fs root to .reiserfs_priv */ -static int -xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) +static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (container_of(q1, struct dentry, d_name) == priv_root) - return -ENOENT; - if (q1->len == name->len && - !memcmp(q1->name, name->name, name->len)) - return 0; - return 1; + return -EPERM; } static const struct dentry_operations xattr_lookup_poison_ops = { - .d_compare = xattr_lookup_poison, + .d_revalidate = xattr_hide_revalidate, }; int reiserfs_lookup_privroot(struct super_block *s) @@ -1000,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - if (!reiserfs_expose_privroot(s)) - s->s_root->d_op = &xattr_lookup_poison_ops; + dentry->d_op = &xattr_lookup_poison_ops; if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index dd20a7883f0f..9cdb759645a9 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -5,6 +5,7 @@ #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/xattr.h> +#include <linux/slab.h> #include <linux/posix_acl_xattr.h> #include <linux/reiserfs_xattr.h> #include <linux/reiserfs_acl.h> diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index d8b5bfcbdd30..7271a477c041 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -3,6 +3,7 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/xattr.h> +#include <linux/slab.h> #include <linux/reiserfs_xattr.h> #include <linux/security.h> #include <asm/uaccess.h> @@ -76,7 +77,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode, return error; } - if (sec->length) { + if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) { blocks = reiserfs_xattr_jcreate_nblocks(inode) + reiserfs_xattr_nblocks(inode, sec->length); /* We don't want to count the directories twice if we have diff --git a/fs/signalfd.c b/fs/signalfd.c index 1dabe4ee02fe..f329849ce3c0 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/list.h> diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 92d5e8ffb639..dbf6548bbf06 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -13,7 +13,6 @@ #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/mm.h> -#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/smp_lock.h> #include <linux/net.h> diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1c4c8f089970..dfa1d67f8fca 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb) if (server->conn_pid) kill_pid(server->conn_pid, SIGTERM, 1); + bdi_destroy(&server->bdi); kfree(server->ops); smb_unload_nls(server); sb->s_fs_info = NULL; @@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) if (!server) goto out_no_server; sb->s_fs_info = server; + + if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY)) + goto out_bdi; + + sb->s_bdi = &server->bdi; server->super_block = sb; server->mnt = NULL; @@ -624,6 +630,8 @@ out_no_smbiod: out_bad_option: kfree(mem); out_no_mem: + bdi_destroy(&server->bdi); +out_bdi: if (!server->mnt) printk(KERN_ERR "smb_fill_super: allocation failure\n"); sb->s_fs_info = NULL; diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 6bd9b691a463..0e39a924f10a 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -12,7 +12,6 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/errno.h> -#include <linux/slab.h> #include <linux/init.h> #include <linux/file.h> #include <linux/dcache.h> diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c index 00b2909bd469..54350b59046b 100644 --- a/fs/smbfs/symlink.c +++ b/fs/smbfs/symlink.c @@ -15,6 +15,7 @@ #include <linux/pagemap.h> #include <linux/net.h> #include <linux/namei.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/fs/splice.c b/fs/splice.c index 39208663aaf1..9313b6124a2e 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -30,6 +30,7 @@ #include <linux/syscalls.h> #include <linux/uio.h> #include <linux/security.h> +#include <linux/gfp.h> /* * Attempt to steal a page from a pipe buffer. This should perhaps go into diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 1cb0d81b164b..653c030eb840 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, u64 cur_index = index >> msblk->devblksize_log2; int bytes, compressed, b = 0, k = 0, page = 0, avail; - - bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1, - sizeof(*bh), GFP_KERNEL); + bh = kcalloc(((srclength + msblk->devblksize - 1) + >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 3550aec2f655..48b6f4a385a6 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -275,7 +275,8 @@ allocate_root: err = squashfs_read_inode(root, root_inode); if (err) { - iget_failed(root); + make_bad_inode(root); + iput(root); goto failed_mount; } insert_inode_hash(root); @@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb) kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); + kfree(sbi->inode_lookup_table); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c index e80be2022a7f..32b911f4ee39 100644 --- a/fs/squashfs/symlink.c +++ b/fs/squashfs/symlink.c @@ -33,7 +33,6 @@ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/string.h> #include <linux/pagemap.h> diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 4dd70e04333b..7a603874e483 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -24,6 +24,7 @@ #include <linux/mutex.h> #include <linux/buffer_head.h> +#include <linux/slab.h> #include <linux/zlib.h> #include "squashfs_fs.h" @@ -127,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, goto release_mutex; } + length = stream->total_out; mutex_unlock(&msblk->read_data_mutex); - return stream->total_out; + return length; release_mutex: mutex_unlock(&msblk->read_data_mutex); diff --git a/fs/super.c b/fs/super.c index f35ac6022109..1527e6a0ee35 100644 --- a/fs/super.c +++ b/fs/super.c @@ -37,6 +37,7 @@ #include <linux/kobject.h> #include <linux/mutex.h> #include <linux/file.h> +#include <linux/backing-dev.h> #include <asm/uaccess.h> #include "internal.h" @@ -693,6 +694,7 @@ int set_anon_super(struct super_block *s, void *data) return -EMFILE; } s->s_dev = MKDEV(0, dev & MINORMASK); + s->s_bdi = &noop_backing_dev_info; return 0; } @@ -954,10 +956,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (error < 0) goto out_free_secdata; BUG_ON(!mnt->mnt_sb); + WARN_ON(!mnt->mnt_sb->s_bdi); - error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); - if (error) - goto out_sb; + error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); + if (error) + goto out_sb; /* * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE diff --git a/fs/sync.c b/fs/sync.c index f557d71cb097..92b228176f7c 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/writeback.h> @@ -13,6 +14,7 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include <linux/backing-dev.h> #include "internal.h" #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ @@ -31,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) * This should be safe, as we require bdi backing to actually * write out data in the first place */ - if (!sb->s_bdi) + if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info) return 0; if (sb->s_qcop && sb->s_qcop->quota_sync) diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 082daaecac1b..a4a0a9419711 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -18,6 +18,7 @@ #include <linux/capability.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/xattr.h> #include <linux/security.h> #include "sysfs.h" diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 0cb10884a2fc..776137828dca 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/magic.h> +#include <linux/slab.h> #include "sysfs.h" diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 1b9a3a1e8a17..b93ec51fa7ac 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -11,6 +11,7 @@ */ #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/mount.h> #include <linux/module.h> #include <linux/kobject.h> diff --git a/fs/timerfd.c b/fs/timerfd.c index 1bfc95ad5f71..98158de91d24 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -14,6 +14,7 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/time.h> diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 4775af401167..37fa7ed062d8 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -45,6 +45,7 @@ #include <linux/freezer.h> #include <linux/kthread.h> +#include <linux/slab.h> #include "ubifs.h" /** diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 90492327b383..c2a68baa782f 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -34,6 +34,7 @@ #include <linux/moduleparam.h> #include <linux/debugfs.h> #include <linux/math64.h> +#include <linux/slab.h> #ifdef CONFIG_UBIFS_FS_DEBUG diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index e26c02ab6cd5..5692cf72b807 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -52,6 +52,7 @@ #include "ubifs.h" #include <linux/mount.h> #include <linux/namei.h> +#include <linux/slab.h> static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index e5a3d8e96bb7..918d1582ca05 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -53,6 +53,7 @@ * good, and GC takes extra care when moving them. */ +#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/list_sort.h> #include "ubifs.h" diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index e589fedaf1ef..77d5cf4a7547 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -51,6 +51,7 @@ */ #include <linux/crc32.h> +#include <linux/slab.h> #include "ubifs.h" /** diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index b2792e84d245..ad7f67b827ea 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -46,6 +46,7 @@ #include "ubifs.h" #include <linux/crc16.h> #include <linux/math64.h> +#include <linux/slab.h> /** * do_calc_lpt_geom - calculate sizes for the LPT area. diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 8cbfb8248025..13cb7a4237bf 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -26,6 +26,7 @@ */ #include <linux/crc16.h> +#include <linux/slab.h> #include "ubifs.h" /** diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 868a55ee080f..109c6ea03bb5 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -31,6 +31,7 @@ */ #include <linux/crc32.h> +#include <linux/slab.h> #include "ubifs.h" /** diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 57085e43320f..96cb62c8a9dd 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -27,6 +27,7 @@ */ #include "ubifs.h" +#include <linux/slab.h> #include <linux/random.h> #include <linux/math64.h> diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e5b1a7d00fa0..2194915220e5 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -31,6 +31,7 @@ */ #include <linux/crc32.h> +#include <linux/slab.h> #include "ubifs.h" /* diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index b2d976366a46..bd2542dad014 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -28,6 +28,7 @@ #include <linux/fs.h> #include <linux/err.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/spinlock.h> #include <linux/mutex.h> diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 195830f47569..c74400f88fe0 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -56,6 +56,7 @@ */ #include "ubifs.h" +#include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 19626e2491c4..9a9378b4eb5a 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -125,9 +125,8 @@ static void udf_bitmap_free_blocks(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); partmap = &sbi->s_partmaps[bloc->partitionReferenceNum]; - if (bloc->logicalBlockNum < 0 || - (bloc->logicalBlockNum + count) > - partmap->s_partition_len) { + if (bloc->logicalBlockNum + count < count || + (bloc->logicalBlockNum + count) > partmap->s_partition_len) { udf_debug("%d < %d || %d + %d > %d\n", bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count, partmap->s_partition_len); @@ -393,9 +392,8 @@ static void udf_table_free_blocks(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); partmap = &sbi->s_partmaps[bloc->partitionReferenceNum]; - if (bloc->logicalBlockNum < 0 || - (bloc->logicalBlockNum + count) > - partmap->s_partition_len) { + if (bloc->logicalBlockNum + count < count || + (bloc->logicalBlockNum + count) > partmap->s_partition_len) { udf_debug("%d < %d || %d + %d > %d\n", bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count, partmap->s_partition_len); diff --git a/fs/udf/file.c b/fs/udf/file.c index 1eb06774ed90..4b6a46ccbf46 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -218,7 +218,7 @@ const struct file_operations udf_file_operations = { .llseek = generic_file_llseek, }; -static int udf_setattr(struct dentry *dentry, struct iattr *iattr) +int udf_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; int error; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index bb863fe579ac..8a3fbd177cab 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1314,7 +1314,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) break; case ICBTAG_FILE_TYPE_SYMLINK: inode->i_data.a_ops = &udf_symlink_aops; - inode->i_op = &page_symlink_inode_operations; + inode->i_op = &udf_symlink_inode_operations; inode->i_mode = S_IFLNK | S_IRWXUGO; break; case ICBTAG_FILE_TYPE_MAIN: diff --git a/fs/udf/namei.c b/fs/udf/namei.c index db423ab078b1..75816025f95f 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -925,7 +925,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, iinfo = UDF_I(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_data.a_ops = &udf_symlink_aops; - inode->i_op = &page_symlink_inode_operations; + inode->i_op = &udf_symlink_inode_operations; if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { struct kernel_lb_addr eloc; @@ -1393,6 +1393,7 @@ const struct export_operations udf_export_ops = { const struct inode_operations udf_dir_inode_operations = { .lookup = udf_lookup, .create = udf_create, + .setattr = udf_setattr, .link = udf_link, .unlink = udf_unlink, .symlink = udf_symlink, @@ -1401,3 +1402,9 @@ const struct inode_operations udf_dir_inode_operations = { .mknod = udf_mknod, .rename = udf_rename, }; +const struct inode_operations udf_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .setattr = udf_setattr, +}; diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 4b540ee632d5..745eb209be0c 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -24,7 +24,6 @@ #include <linux/fs.h> #include <linux/string.h> -#include <linux/slab.h> #include <linux/buffer_head.h> uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 852e91845688..16064787d2b7 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -26,7 +26,6 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/stat.h> -#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 4223ac855da9..702a1148e702 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -76,6 +76,7 @@ extern const struct inode_operations udf_dir_inode_operations; extern const struct file_operations udf_dir_operations; extern const struct inode_operations udf_file_inode_operations; extern const struct file_operations udf_file_operations; +extern const struct inode_operations udf_symlink_inode_operations; extern const struct address_space_operations udf_aops; extern const struct address_space_operations udf_adinicb_aops; extern const struct address_space_operations udf_symlink_aops; @@ -131,7 +132,7 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, /* file.c */ extern int udf_ioctl(struct inode *, struct file *, unsigned int, unsigned long); - +extern int udf_setattr(struct dentry *dentry, struct iattr *iattr); /* inode.c */ extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); extern int udf_sync_inode(struct inode *); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index cefa8c8913e6..d03a90b6ad69 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -24,6 +24,7 @@ #include <linux/string.h> /* for memset */ #include <linux/nls.h> #include <linux/crc-itu-t.h> +#include <linux/slab.h> #include "udf_sb.h" diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index 05ac0fe9c4d3..8d5a506c82eb 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c @@ -6,9 +6,9 @@ */ #include <linux/module.h> -#include <linux/slab.h> #include <linux/fs.h> #include <linux/posix_acl_xattr.h> +#include <linux/gfp.h> /* diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index bc7405585def..666c9db48eb6 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -17,6 +17,7 @@ */ #include <linux/mm.h> #include <linux/highmem.h> +#include <linux/slab.h> #include <linux/swap.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index bf85bbe4a9ae..a7bc925c4d60 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -22,6 +22,7 @@ #include "xfs_inode.h" #include "xfs_vnodeops.h" #include "xfs_trace.h" +#include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 99628508cb11..0f8b9968a803 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -40,6 +40,7 @@ #include "xfs_vnodeops.h" #include "xfs_trace.h" #include "xfs_bmap.h" +#include <linux/gfp.h> #include <linux/mpage.h> #include <linux/pagevec.h> #include <linux/writeback.h> diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index bd111b7e1daa..44c2b0ef9a41 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -18,7 +18,7 @@ #include "xfs.h" #include <linux/stddef.h> #include <linux/errno.h> -#include <linux/slab.h> +#include <linux/gfp.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/vmalloc.h> diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 4ea1ee18aded..7b26cc2fd284 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -58,6 +58,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/pagemap.h> +#include <linux/slab.h> #include <linux/exportfs.h> /* diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0bf6d61f0528..593c05b4df8d 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -18,6 +18,7 @@ #include <linux/compat.h> #include <linux/ioctl.h> #include <linux/mount.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include "xfs.h" #include "xfs_fs.h" diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 61a99608731e..e65a7937f3a4 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -56,6 +56,7 @@ #include <linux/security.h> #include <linux/falloc.h> #include <linux/fiemap.h> +#include <linux/slab.h> /* * Bring the timestamps in the XFS inode uptodate. diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 71345a370d9f..29f1edca76de 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -61,6 +61,7 @@ #include <linux/namei.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/mempool.h> #include <linux/writeback.h> @@ -1208,6 +1209,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + xfs_inode_shrinker_unregister(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); xfs_dmops_put(mp); @@ -1621,6 +1623,8 @@ xfs_fs_fill_super( if (error) goto fail_vnrele; + xfs_inode_shrinker_register(mp); + kfree(mtpt); return 0; @@ -1866,6 +1870,7 @@ init_xfs_fs(void) goto out_cleanup_procfs; vfs_initquota(); + xfs_inode_shrinker_init(); error = register_filesystem(&xfs_fs_type); if (error) @@ -1893,6 +1898,7 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); + xfs_inode_shrinker_destroy(); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 05cd85317f6f..a427c638d909 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -95,7 +95,8 @@ xfs_inode_ag_walk( struct xfs_perag *pag, int flags), int flags, int tag, - int exclusive) + int exclusive, + int *nr_to_scan) { uint32_t first_index; int last_error = 0; @@ -134,7 +135,7 @@ restart: if (error == EFSCORRUPTED) break; - } while (1); + } while ((*nr_to_scan)--); if (skipped) { delay(1); @@ -150,12 +151,15 @@ xfs_inode_ag_iterator( struct xfs_perag *pag, int flags), int flags, int tag, - int exclusive) + int exclusive, + int *nr_to_scan) { int error = 0; int last_error = 0; xfs_agnumber_t ag; + int nr; + nr = nr_to_scan ? *nr_to_scan : INT_MAX; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { struct xfs_perag *pag; @@ -165,14 +169,18 @@ xfs_inode_ag_iterator( continue; } error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, - exclusive); + exclusive, &nr); xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) break; } + if (nr <= 0) + break; } + if (nr_to_scan) + *nr_to_scan = nr; return XFS_ERROR(last_error); } @@ -291,7 +299,7 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG, 0); + XFS_ICI_NO_TAG, 0, NULL); if (error) return XFS_ERROR(error); @@ -310,7 +318,7 @@ xfs_sync_attr( ASSERT((flags & ~SYNC_WAIT) == 0); return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG, 0); + XFS_ICI_NO_TAG, 0, NULL); } STATIC int @@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag( radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable++; } /* @@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag( { radix_tree_tag_clear(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable--; } /* @@ -820,10 +830,10 @@ xfs_reclaim_inode( * call into reclaim to find it in a clean state instead of waiting for * it now. We also don't return errors here - if the error is transient * then the next reclaim pass will flush the inode, and if the error - * is permanent then the next sync reclaim will relcaim the inode and + * is permanent then the next sync reclaim will reclaim the inode and * pass on the error. */ - if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { + if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_fs_cmn_err(CE_WARN, ip->i_mount, "inode 0x%llx background reclaim flush failed with %d", (long long)ip->i_ino, error); @@ -854,5 +864,93 @@ xfs_reclaim_inodes( int mode) { return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, - XFS_ICI_RECLAIM_TAG, 1); + XFS_ICI_RECLAIM_TAG, 1, NULL); +} + +/* + * Shrinker infrastructure. + * + * This is all far more complex than it needs to be. It adds a global list of + * mounts because the shrinkers can only call a global context. We need to make + * the shrinkers pass a context to avoid the need for global state. + */ +static LIST_HEAD(xfs_mount_list); +static struct rw_semaphore xfs_mount_list_lock; + +static int +xfs_reclaim_inode_shrink( + int nr_to_scan, + gfp_t gfp_mask) +{ + struct xfs_mount *mp; + struct xfs_perag *pag; + xfs_agnumber_t ag; + int reclaimable = 0; + + if (nr_to_scan) { + if (!(gfp_mask & __GFP_FS)) + return -1; + + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, + XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); + if (nr_to_scan <= 0) + break; + } + up_read(&xfs_mount_list_lock); + } + + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { + + pag = xfs_perag_get(mp, ag); + if (!pag->pag_ici_init) { + xfs_perag_put(pag); + continue; + } + reclaimable += pag->pag_ici_reclaimable; + xfs_perag_put(pag); + } + } + up_read(&xfs_mount_list_lock); + return reclaimable; +} + +static struct shrinker xfs_inode_shrinker = { + .shrink = xfs_reclaim_inode_shrink, + .seeks = DEFAULT_SEEKS, +}; + +void __init +xfs_inode_shrinker_init(void) +{ + init_rwsem(&xfs_mount_list_lock); + register_shrinker(&xfs_inode_shrinker); +} + +void +xfs_inode_shrinker_destroy(void) +{ + ASSERT(list_empty(&xfs_mount_list)); + unregister_shrinker(&xfs_inode_shrinker); +} + +void +xfs_inode_shrinker_register( + struct xfs_mount *mp) +{ + down_write(&xfs_mount_list_lock); + list_add_tail(&mp->m_mplist, &xfs_mount_list); + up_write(&xfs_mount_list_lock); +} + +void +xfs_inode_shrinker_unregister( + struct xfs_mount *mp) +{ + down_write(&xfs_mount_list_lock); + list_del(&mp->m_mplist); + up_write(&xfs_mount_list_lock); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index d480c346cabb..cdcbaaca9880 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag, int write_lock); + int flags, int tag, int write_lock, int *nr_to_scan); + +void xfs_inode_shrinker_init(void); +void xfs_inode_shrinker_destroy(void); +void xfs_inode_shrinker_register(struct xfs_mount *mp); +void xfs_inode_shrinker_unregister(struct xfs_mount *mp); #endif diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 5d0ee8d492db..50bee07d6b0e 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, + XFS_ICI_NO_TAG, 0, NULL); } /*------------------------------------------------------------------------*/ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index b1a5a1ff88ea..abb8222b88c9 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -223,6 +223,7 @@ typedef struct xfs_perag { int pag_ici_init; /* incore inode cache initialised */ rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ + int pag_ici_reclaimable; /* reclaimable inodes */ #endif int pagb_count; /* pagb slots in use */ xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index cd27c9d6c71f..5bba29a07812 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -177,16 +177,26 @@ xfs_swap_extents_check_format( XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) return EINVAL; - /* Check root block of temp in btree form to max in target */ + /* + * If we are in a btree format, check that the temp root block will fit + * in the target and that it has enough extents to be in btree format + * in the target. + * + * Note that we have to be careful to allow btree->extent conversions + * (a common defrag case) which will occur when the temp inode is in + * extent format... + */ if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && - XFS_IFORK_BOFF(ip) && - tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) + ((XFS_IFORK_BOFF(ip) && + tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || + XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) return EINVAL; - /* Check root block of target in btree form to max in temp */ + /* Reciprocal target->temp btree format checks */ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && - XFS_IFORK_BOFF(tip) && - ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) + ((XFS_IFORK_BOFF(tip) && + ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || + XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) return EINVAL; return 0; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e8fba92d7cd9..2be019136287 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -745,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp, /* * Determine if we have a transaction that has gone to disk - * that needs to be covered. Log activity needs to be idle (no AIL and - * nothing in the iclogs). And, we need to be in the right state indicating - * something has gone out. + * that needs to be covered. To begin the transition to the idle state + * firstly the log needs to be idle (no AIL and nothing in the iclogs). + * If we are then in a state where covering is needed, the caller is informed + * that dummy transactions are required to move the log into the idle state. + * + * Because this is called as part of the sync process, we should also indicate + * that dummy transactions should be issued in anything but the covered or + * idle states. This ensures that the log tail is accurately reflected in + * the log at the end of the sync, hence if a crash occurrs avoids replay + * of transactions where the metadata is already on disk. */ int xfs_log_need_covered(xfs_mount_t *mp) @@ -759,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp) return 0; spin_lock(&log->l_icloglock); - if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || - (log->l_covered_state == XLOG_STATE_COVER_NEED2)) - && !xfs_trans_ail_tail(log->l_ailp) - && xlog_iclogs_empty(log)) { - if (log->l_covered_state == XLOG_STATE_COVER_NEED) - log->l_covered_state = XLOG_STATE_COVER_DONE; - else { - ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); - log->l_covered_state = XLOG_STATE_COVER_DONE2; + switch (log->l_covered_state) { + case XLOG_STATE_COVER_DONE: + case XLOG_STATE_COVER_DONE2: + case XLOG_STATE_COVER_IDLE: + break; + case XLOG_STATE_COVER_NEED: + case XLOG_STATE_COVER_NEED2: + if (!xfs_trans_ail_tail(log->l_ailp) && + xlog_iclogs_empty(log)) { + if (log->l_covered_state == XLOG_STATE_COVER_NEED) + log->l_covered_state = XLOG_STATE_COVER_DONE; + else + log->l_covered_state = XLOG_STATE_COVER_DONE2; } + /* FALLTHRU */ + default: needed = 1; + break; } spin_unlock(&log->l_icloglock); return needed; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4fa0bc7b983e..9ff48a16a7ee 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -259,6 +259,7 @@ typedef struct xfs_mount { wait_queue_head_t m_wait_single_sync_task; __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ + struct list_head m_mplist; /* inode shrinker mount list */ } xfs_mount_t; /* |