summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/Kconfig8
-rw-r--r--fs/cifs/cifsacl.c48
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsproto.h6
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/cifs/dns_resolve.c2
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/cifs/fscache.c12
-rw-r--r--fs/cifs/inode.c36
-rw-r--r--fs/cifs/readdir.c29
-rw-r--r--fs/cifs/xattr.c55
-rw-r--r--fs/compat.c28
-rw-r--r--fs/exec.c36
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c14
-rw-r--r--fs/ocfs2/dcache.c1
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/ocfs2.h6
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c94
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c35
-rw-r--r--fs/xfs/xfs_bmap.c85
-rw-r--r--fs/xfs/xfs_bmap.h5
-rw-r--r--fs/xfs/xfs_dfrag.c13
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_error.h5
-rw-r--r--fs/xfs/xfs_inode_item.c31
29 files changed, 383 insertions, 198 deletions
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0ed213970ced..ee45648b0d1a 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -4,6 +4,7 @@ config CIFS
select NLS
select CRYPTO
select CRYPTO_MD5
+ select CRYPTO_HMAC
select CRYPTO_ARC4
help
This is the client VFS module for the Common Internet File System
@@ -143,6 +144,13 @@ config CIFS_FSCACHE
to be cached locally on disk through the general filesystem cache
manager. If unsure, say N.
+config CIFS_ACL
+ bool "Provide CIFS ACL support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && CIFS_XATTR
+ help
+ Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
+ is handed over to the application/caller.
+
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c9b4792ae825..c6ebea088ac7 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -560,7 +560,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
- return NULL;
+ return ERR_CAST(tlink);
xid = GetXid();
rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen);
@@ -568,7 +568,9 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
cifs_put_tlink(tlink);
- cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
+ cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+ if (rc)
+ return ERR_PTR(rc);
return pntsd;
}
@@ -583,7 +585,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
- return NULL;
+ return ERR_CAST(tlink);
tcon = tlink_tcon(tlink);
xid = GetXid();
@@ -591,23 +593,22 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0,
&fid, &oplock, NULL, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc) {
- cERROR(1, "Unable to open file to get ACL");
- goto out;
+ if (!rc) {
+ rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
+ CIFSSMBClose(xid, tcon, fid);
}
- rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
- cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
-
- CIFSSMBClose(xid, tcon, fid);
- out:
cifs_put_tlink(tlink);
FreeXid(xid);
+
+ cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+ if (rc)
+ return ERR_PTR(rc);
return pntsd;
}
/* Retrieve an ACL from the server */
-static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
+struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
struct inode *inode, const char *path,
u32 *pacllen)
{
@@ -695,7 +696,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
}
/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
-void
+int
cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
struct inode *inode, const char *path, const __u16 *pfid)
{
@@ -711,17 +712,21 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen);
/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
- if (pntsd)
+ if (IS_ERR(pntsd)) {
+ rc = PTR_ERR(pntsd);
+ cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+ } else {
rc = parse_sec_desc(pntsd, acllen, fattr);
- if (rc)
- cFYI(1, "parse sec desc failed rc = %d", rc);
+ kfree(pntsd);
+ if (rc)
+ cERROR(1, "parse sec desc failed rc = %d", rc);
+ }
- kfree(pntsd);
- return;
+ return rc;
}
/* Convert mode bits to an ACL so we can update the ACL on the server */
-int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
+int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode)
{
int rc = 0;
__u32 secdesclen = 0;
@@ -736,7 +741,10 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
/* Add three ACEs for owner, group, everyone getting rid of
other ACEs as chmod disables ACEs and set the security descriptor */
- if (pntsd) {
+ if (IS_ERR(pntsd)) {
+ rc = PTR_ERR(pntsd);
+ cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+ } else {
/* allocate memory for the smb header,
set security descriptor request security descriptor
parameters, and secuirty descriptor itself */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9c3789762ab7..76c8a906a63e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -458,6 +458,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
seq_printf(s, ",acl");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
seq_printf(s, ",mfsymlinks");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+ seq_printf(s, ",fsc");
seq_printf(s, ",rsize=%d", cifs_sb->rsize);
seq_printf(s, ",wsize=%d", cifs_sb->wsize);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 7ed69b6b5fe6..db961dc4fd3d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -130,10 +130,12 @@ extern int cifs_get_file_info_unix(struct file *filp);
extern int cifs_get_inode_info_unix(struct inode **pinode,
const unsigned char *search_path,
struct super_block *sb, int xid);
-extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
+extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
struct cifs_fattr *fattr, struct inode *inode,
const char *path, const __u16 *pfid);
-extern int mode_to_acl(struct inode *inode, const char *path, __u64);
+extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
+extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
+ const char *, u32 *);
extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
const char *);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 251a17c03545..32fa4d9b5dbc 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1352,6 +1352,11 @@ cifs_parse_mount_options(char *options, const char *devname,
"supported. Instead set "
"/proc/fs/cifs/LookupCacheEnabled to 0\n");
} else if (strnicmp(data, "fsc", 3) == 0) {
+#ifndef CONFIG_CIFS_FSCACHE
+ cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
+ "kernel config option set");
+ return 1;
+#endif
vol->fsc = true;
} else if (strnicmp(data, "mfsymlinks", 10) == 0) {
vol->mfsymlinks = true;
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 0eb87026cad3..548f06230a6d 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -66,7 +66,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
/* Search for server name delimiter */
sep = memchr(hostname, '\\', len);
if (sep)
- len = sep - unc;
+ len = sep - hostname;
else
cFYI(1, "%s: probably server name is whole unc: %s",
__func__, unc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 06c3e83fa387..b857ce5db775 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2271,8 +2271,10 @@ void cifs_oplock_break_get(struct cifsFileInfo *cfile)
void cifs_oplock_break_put(struct cifsFileInfo *cfile)
{
+ struct super_block *sb = cfile->dentry->d_sb;
+
cifsFileInfo_put(cfile);
- cifs_sb_deactive(cfile->dentry->d_sb);
+ cifs_sb_deactive(sb);
}
const struct address_space_operations cifs_addr_ops = {
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index a2ad94efcfe6..297a43d0ff7f 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -2,7 +2,7 @@
* fs/cifs/fscache.c - CIFS filesystem cache interface
*
* Copyright (c) 2010 Novell, Inc.
- * Author(s): Suresh Jayaraman (sjayaraman@suse.de>
+ * Author(s): Suresh Jayaraman <sjayaraman@suse.de>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
@@ -67,10 +67,12 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
if (cifsi->fscache)
return;
- cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
+ cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
&cifs_fscache_inode_object_def, cifsi);
- cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
+ cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
cifsi->fscache);
+ }
}
void cifs_fscache_release_inode_cookie(struct inode *inode)
@@ -101,10 +103,8 @@ void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
{
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
cifs_fscache_disable_inode_cookie(inode);
- else {
+ else
cifs_fscache_enable_inode_cookie(inode);
- cFYI(1, "CIFS: fscache inode cookie set");
- }
}
void cifs_fscache_reset_inode_cookie(struct inode *inode)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ef3a55bf86b6..28cb6e735943 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -689,8 +689,13 @@ int cifs_get_inode_info(struct inode **pinode,
#ifdef CONFIG_CIFS_EXPERIMENTAL
/* fill in 0777 bits from ACL */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
- cFYI(1, "Getting mode bits from ACL");
- cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
+ rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path,
+ pfid);
+ if (rc) {
+ cFYI(1, "%s: Getting ACL failed with error: %d",
+ __func__, rc);
+ goto cgii_exit;
+ }
}
#endif
@@ -881,8 +886,10 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
xid, NULL);
- if (!inode)
- return ERR_PTR(rc);
+ if (!inode) {
+ inode = ERR_PTR(rc);
+ goto out;
+ }
#ifdef CONFIG_CIFS_FSCACHE
/* populate tcon->resource_id */
@@ -898,13 +905,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
inode->i_uid = cifs_sb->mnt_uid;
inode->i_gid = cifs_sb->mnt_gid;
} else if (rc) {
- kfree(full_path);
- _FreeXid(xid);
iget_failed(inode);
- return ERR_PTR(rc);
+ inode = ERR_PTR(rc);
}
-
+out:
kfree(full_path);
/* can not call macro FreeXid here since in a void func
* TODO: This is no longer true
@@ -1670,7 +1675,9 @@ cifs_inode_needs_reval(struct inode *inode)
return false;
}
-/* check invalid_mapping flag and zap the cache if it's set */
+/*
+ * Zap the cache. Called when invalid_mapping flag is set.
+ */
static void
cifs_invalidate_mapping(struct inode *inode)
{
@@ -2115,9 +2122,14 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
if (attrs->ia_valid & ATTR_MODE) {
rc = 0;
#ifdef CONFIG_CIFS_EXPERIMENTAL
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
- rc = mode_to_acl(inode, full_path, mode);
- else
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+ rc = mode_to_cifs_acl(inode, full_path, mode);
+ if (rc) {
+ cFYI(1, "%s: Setting ACL failed with error: %d",
+ __func__, rc);
+ goto cifs_setattr_exit;
+ }
+ } else
#endif
if (((mode & S_IWUGO) == 0) &&
(cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ef7bb7b50f58..32d300e8f20e 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -226,26 +226,29 @@ static int initiate_cifs_search(const int xid, struct file *file)
char *full_path = NULL;
struct cifsFileInfo *cifsFile;
struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
- struct tcon_link *tlink;
+ struct tcon_link *tlink = NULL;
struct cifsTconInfo *pTcon;
- tlink = cifs_sb_tlink(cifs_sb);
- if (IS_ERR(tlink))
- return PTR_ERR(tlink);
- pTcon = tlink_tcon(tlink);
-
- if (file->private_data == NULL)
- file->private_data =
- kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
if (file->private_data == NULL) {
- rc = -ENOMEM;
- goto error_exit;
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink))
+ return PTR_ERR(tlink);
+
+ cifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+ if (cifsFile == NULL) {
+ rc = -ENOMEM;
+ goto error_exit;
+ }
+ file->private_data = cifsFile;
+ cifsFile->tlink = cifs_get_tlink(tlink);
+ pTcon = tlink_tcon(tlink);
+ } else {
+ cifsFile = file->private_data;
+ pTcon = tlink_tcon(cifsFile->tlink);
}
- cifsFile = file->private_data;
cifsFile->invalidHandle = true;
cifsFile->srch_inf.endOfSearch = false;
- cifsFile->tlink = cifs_get_tlink(tlink);
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index a264b744bb41..eae2a1491608 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -30,10 +30,11 @@
#define MAX_EA_VALUE_SIZE 65535
#define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib"
+#define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
#define CIFS_XATTR_USER_PREFIX "user."
#define CIFS_XATTR_SYSTEM_PREFIX "system."
#define CIFS_XATTR_OS2_PREFIX "os2."
-#define CIFS_XATTR_SECURITY_PREFIX ".security"
+#define CIFS_XATTR_SECURITY_PREFIX "security."
#define CIFS_XATTR_TRUSTED_PREFIX "trusted."
#define XATTR_TRUSTED_PREFIX_LEN 8
#define XATTR_SECURITY_PREFIX_LEN 9
@@ -277,29 +278,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
- else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
- __u16 fid;
- int oplock = 0;
- struct cifs_ntsd *pacl = NULL;
- __u32 buflen = 0;
- if (experimEnabled)
- rc = CIFSSMBOpen(xid, pTcon, full_path,
- FILE_OPEN, GENERIC_READ, 0, &fid,
- &oplock, NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- /* else rc is EOPNOTSUPP from above */
-
- if (rc == 0) {
- rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, &pacl,
- &buflen);
- CIFSSMBClose(xid, pTcon, fid);
- }
- }
-#endif /* EXPERIMENTAL */
#else
- cFYI(1, "query POSIX ACL not supported yet");
+ cFYI(1, "Query POSIX ACL not supported yet");
#endif /* CONFIG_CIFS_POSIX */
} else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -311,8 +291,33 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
#else
- cFYI(1, "query POSIX default ACL not supported yet");
-#endif
+ cFYI(1, "Query POSIX default ACL not supported yet");
+#endif /* CONFIG_CIFS_POSIX */
+ } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
+ strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+#ifdef CONFIG_CIFS_ACL
+ u32 acllen;
+ struct cifs_ntsd *pacl;
+
+ pacl = get_cifs_acl(cifs_sb, direntry->d_inode,
+ full_path, &acllen);
+ if (IS_ERR(pacl)) {
+ rc = PTR_ERR(pacl);
+ cERROR(1, "%s: error %zd getting sec desc",
+ __func__, rc);
+ } else {
+ if (ea_value) {
+ if (acllen > buf_size)
+ acllen = -ERANGE;
+ else
+ memcpy(ea_value, pacl, acllen);
+ }
+ rc = acllen;
+ kfree(pacl);
+ }
+#else
+ cFYI(1, "Query CIFS ACL not supported yet");
+#endif /* CONFIG_CIFS_ACL */
} else if (strncmp(ea_name,
CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
cFYI(1, "Trusted xattr namespace not supported yet");
diff --git a/fs/compat.c b/fs/compat.c
index c580c322fa6b..eb1740ac8c0a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1350,6 +1350,10 @@ static int compat_count(compat_uptr_t __user *argv, int max)
argv++;
if (i++ >= max)
return -E2BIG;
+
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
+ cond_resched();
}
}
return i;
@@ -1391,6 +1395,12 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
while (len > 0) {
int offset, bytes_to_copy;
+ if (fatal_signal_pending(current)) {
+ ret = -ERESTARTNOHAND;
+ goto out;
+ }
+ cond_resched();
+
offset = pos % PAGE_SIZE;
if (offset == 0)
offset = PAGE_SIZE;
@@ -1407,18 +1417,8 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
struct page *page;
-#ifdef CONFIG_STACK_GROWSUP
- ret = expand_stack_downwards(bprm->vma, pos);
- if (ret < 0) {
- /* We've exceed the stack rlimit. */
- ret = -E2BIG;
- goto out;
- }
-#endif
- ret = get_user_pages(current, bprm->mm, pos,
- 1, 1, 1, &page, NULL);
- if (ret <= 0) {
- /* We've exceed the stack rlimit. */
+ page = get_arg_page(bprm, pos, 1);
+ if (!page) {
ret = -E2BIG;
goto out;
}
@@ -1539,8 +1539,10 @@ int compat_do_execve(char * filename,
return retval;
out:
- if (bprm->mm)
+ if (bprm->mm) {
+ acct_arg_size(bprm, 0);
mmput(bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index 99d33a1371e9..d68c378a3137 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -164,7 +164,26 @@ out:
#ifdef CONFIG_MMU
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+ struct mm_struct *mm = current->mm;
+ long diff = (long)(pages - bprm->vma_pages);
+
+ if (!mm || !diff)
+ return;
+
+ bprm->vma_pages = pages;
+
+#ifdef SPLIT_RSS_COUNTING
+ add_mm_counter(mm, MM_ANONPAGES, diff);
+#else
+ spin_lock(&mm->page_table_lock);
+ add_mm_counter(mm, MM_ANONPAGES, diff);
+ spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -186,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
struct rlimit *rlim;
+ acct_arg_size(bprm, size / PAGE_SIZE);
+
/*
* We've historically supported up to 32 pages (ARG_MAX)
* of argument strings even with small stacks
@@ -276,7 +297,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
#else
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+}
+
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
@@ -1003,6 +1028,7 @@ int flush_old_exec(struct linux_binprm * bprm)
/*
* Release all of the old mmap stuff
*/
+ acct_arg_size(bprm, 0);
retval = exec_mmap(bprm->mm);
if (retval)
goto out;
@@ -1426,8 +1452,10 @@ int do_execve(const char * filename,
return retval;
out:
- if (bprm->mm)
- mmput (bprm->mm);
+ if (bprm->mm) {
+ acct_arg_size(bprm, 0);
+ mmput(bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8ea4a4180a87..f0a384e2ae63 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -395,13 +395,9 @@ int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct x
static
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
{
- struct nfs_inode *node;
if (dentry->d_inode == NULL)
goto different;
- node = NFS_I(dentry->d_inode);
- if (node->fh.size != entry->fh->size)
- goto different;
- if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
+ if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0)
goto different;
return 1;
different:
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 52c7557f3e25..9f26ac9be2a4 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1964,8 +1964,10 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
if (reg == NULL)
return ERR_PTR(-ENOMEM);
- if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
+ if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
+ ret = -ENAMETOOLONG;
+ goto free;
+ }
spin_lock(&o2hb_live_lock);
reg->hr_region_num = 0;
@@ -1974,7 +1976,8 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
O2NM_MAX_REGIONS);
if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
spin_unlock(&o2hb_live_lock);
- return ERR_PTR(-EFBIG);
+ ret = -EFBIG;
+ goto free;
}
set_bit(reg->hr_region_num, o2hb_region_bitmap);
}
@@ -1986,10 +1989,13 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
if (ret) {
config_item_put(&reg->hr_item);
- return ERR_PTR(ret);
+ goto free;
}
return &reg->hr_item;
+free:
+ kfree(reg);
+ return ERR_PTR(ret);
}
static void o2hb_heartbeat_group_drop_item(struct config_group *group,
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index edaded48e7e9..895532ac4d98 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -476,7 +476,6 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
out:
iput(inode);
- ocfs2_dentry_attach_gen(dentry);
}
/*
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 58a93b953735..cc2aaa96cfe5 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -959,7 +959,7 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
r += O2HB_MAX_REGION_NAME_LEN;
}
- local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
+ local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC);
if (!local) {
status = -ENOMEM;
goto bail;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1efea3615589..70dd3b1798f1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -159,9 +159,9 @@ struct ocfs2_lock_res {
char l_name[OCFS2_LOCK_ID_MAX_LEN];
unsigned int l_ro_holders;
unsigned int l_ex_holders;
- char l_level;
- char l_requested;
- char l_blocking;
+ signed char l_level;
+ signed char l_requested;
+ signed char l_blocking;
/* Data packed - type enum ocfs2_lock_type */
unsigned char l_type;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 252e7c82f929..a5ebe421195f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -190,7 +190,7 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
return c;
}
- return c;
+ return NULL;
}
/*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f3d02ca461ec..182845147fe4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1574,7 +1574,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
if (!tmp)
return -ENOMEM;
- pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE);
+ pathname = d_path(path, tmp, PAGE_SIZE);
len = PTR_ERR(pathname);
if (IS_ERR(pathname))
goto out;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 536d697a8a28..90d2fcb67a31 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -472,7 +472,9 @@ int reiserfs_acl_chmod(struct inode *inode)
struct reiserfs_transaction_handle th;
size_t size = reiserfs_xattr_nblocks(inode,
reiserfs_acl_size(clone->a_count));
- reiserfs_write_lock(inode->i_sb);
+ int depth;
+
+ depth = reiserfs_write_lock_once(inode->i_sb);
error = journal_begin(&th, inode->i_sb, size * 2);
if (!error) {
int error2;
@@ -482,7 +484,7 @@ int reiserfs_acl_chmod(struct inode *inode)
if (error2)
error = error2;
}
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, depth);
}
posix_acl_release(clone);
return error;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7d287afccde5..691f61223ed6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -934,7 +934,6 @@ xfs_aops_discard_page(
struct xfs_inode *ip = XFS_I(inode);
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
- ssize_t len = 1 << inode->i_blkbits;
if (!xfs_is_delayed_page(page, IO_DELAY))
goto out_invalidate;
@@ -949,58 +948,14 @@ xfs_aops_discard_page(
xfs_ilock(ip, XFS_ILOCK_EXCL);
bh = head = page_buffers(page);
do {
- int done;
- xfs_fileoff_t offset_fsb;
- xfs_bmbt_irec_t imap;
- int nimaps = 1;
int error;
- xfs_fsblock_t firstblock;
- xfs_bmap_free_t flist;
+ xfs_fileoff_t start_fsb;
if (!buffer_delay(bh))
goto next_buffer;
- offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-
- /*
- * Map the range first and check that it is a delalloc extent
- * before trying to unmap the range. Otherwise we will be
- * trying to remove a real extent (which requires a
- * transaction) or a hole, which is probably a bad idea...
- */
- error = xfs_bmapi(NULL, ip, offset_fsb, 1,
- XFS_BMAPI_ENTIRE, NULL, 0, &imap,
- &nimaps, NULL);
-
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
- "page discard failed delalloc mapping lookup.");
- }
- break;
- }
- if (!nimaps) {
- /* nothing there */
- goto next_buffer;
- }
- if (imap.br_startblock != DELAYSTARTBLOCK) {
- /* been converted, ignore */
- goto next_buffer;
- }
- WARN_ON(imap.br_blockcount == 0);
-
- /*
- * Note: while we initialise the firstblock/flist pair, they
- * should never be used because blocks should never be
- * allocated or freed for a delalloc extent and hence we need
- * don't cancel or finish them after the xfs_bunmapi() call.
- */
- xfs_bmap_init(&flist, &firstblock);
- error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
- &flist, &done);
-
- ASSERT(!flist.xbf_count && !flist.xbf_first);
+ start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
if (error) {
/* something screwed, just bail */
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +965,7 @@ xfs_aops_discard_page(
break;
}
next_buffer:
- offset += len;
+ offset += 1 << inode->i_blkbits;
} while ((bh = bh->b_this_page) != head);
@@ -1505,11 +1460,42 @@ xfs_vm_write_failed(
struct inode *inode = mapping->host;
if (to > inode->i_size) {
- struct iattr ia = {
- .ia_valid = ATTR_SIZE | ATTR_FORCE,
- .ia_size = inode->i_size,
- };
- xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+ /*
+ * punch out the delalloc blocks we have already allocated. We
+ * don't call xfs_setattr() to do this as we may be in the
+ * middle of a multi-iovec write and so the vfs inode->i_size
+ * will not match the xfs ip->i_size and so it will zero too
+ * much. Hence we jus truncate the page cache to zero what is
+ * necessary and punch the delalloc blocks directly.
+ */
+ struct xfs_inode *ip = XFS_I(inode);
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t end_fsb;
+ int error;
+
+ truncate_pagecache(inode, to, inode->i_size);
+
+ /*
+ * Check if there are any blocks that are outside of i_size
+ * that need to be trimmed back.
+ */
+ start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+ end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+ if (end_fsb <= start_fsb)
+ return;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+ end_fsb - start_fsb);
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "xfs_vm_write_failed: unable to clean up ino %lld",
+ ip->i_ino);
+ }
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1d353def29..4c5deb6e9e31 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -488,29 +488,16 @@ found:
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
- /* Attempt to get the semaphore without sleeping,
- * if this does not work then we need to drop the
- * spinlock and do a hard attempt on the semaphore.
- */
- if (down_trylock(&bp->b_sema)) {
+ if (xfs_buf_cond_lock(bp)) {
+ /* failed, so wait for the lock if requested. */
if (!(flags & XBF_TRYLOCK)) {
- /* wait for buffer ownership */
xfs_buf_lock(bp);
XFS_STATS_INC(xb_get_locked_waited);
} else {
- /* We asked for a trylock and failed, no need
- * to look at file offset and length here, we
- * know that this buffer at least overlaps our
- * buffer and is locked, therefore our buffer
- * either does not exist, or is this buffer.
- */
xfs_buf_rele(bp);
XFS_STATS_INC(xb_busy_locked);
return NULL;
}
- } else {
- /* trylock worked */
- XB_SET_OWNER(bp);
}
if (bp->b_flags & XBF_STALE) {
@@ -876,10 +863,18 @@ xfs_buf_rele(
*/
/*
- * Locks a buffer object, if it is not already locked.
- * Note that this in no way locks the underlying pages, so it is only
- * useful for synchronizing concurrent use of buffer objects, not for
- * synchronizing independent access to the underlying pages.
+ * Locks a buffer object, if it is not already locked. Note that this in
+ * no way locks the underlying pages, so it is only useful for
+ * synchronizing concurrent use of buffer objects, not for synchronizing
+ * independent access to the underlying pages.
+ *
+ * If we come across a stale, pinned, locked buffer, we know that we are
+ * being asked to lock a buffer that has been reallocated. Because it is
+ * pinned, we know that the log has not been pushed to disk and hence it
+ * will still be locked. Rather than continuing to have trylock attempts
+ * fail until someone else pushes the log, push it ourselves before
+ * returning. This means that the xfsaild will not get stuck trying
+ * to push on stale inode buffers.
*/
int
xfs_buf_cond_lock(
@@ -890,6 +885,8 @@ xfs_buf_cond_lock(
locked = down_trylock(&bp->b_sema) == 0;
if (locked)
XB_SET_OWNER(bp);
+ else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+ xfs_log_force(bp->b_target->bt_mount, 0);
trace_xfs_buf_cond_lock(bp, _RET_IP_);
return locked ? 0 : -EBUSY;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8abd12e32e13..4111cd3966c7 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
if (error)
goto out_unlock_iolock;
}
-
- ASSERT(ip->i_delayed_blks == 0);
+ /*
+ * even after flushing the inode, there can still be delalloc
+ * blocks on the inode beyond EOF due to speculative
+ * preallocation. These are not removed until the release
+ * function is called or the inode is inactivated. Hence we
+ * cannot assert here that ip->i_delayed_blks == 0.
+ */
}
lock = xfs_ilock_map_shared(ip);
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves(
*count += xfs_bmbt_disk_get_blockcount(frp);
}
}
+
+/*
+ * dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. Walks a block at a time so will be slow, but is only executed in
+ * rare error cases so the overhead is not critical. This will alays punch out
+ * both the start and end blocks, even if the ranges only partially overlap
+ * them, so it is up to the caller to ensure that partial blocks are not
+ * passed in.
+ */
+int
+xfs_bmap_punch_delalloc_range(
+ struct xfs_inode *ip,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t length)
+{
+ xfs_fileoff_t remaining = length;
+ int error = 0;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ do {
+ int done;
+ xfs_bmbt_irec_t imap;
+ int nimaps = 1;
+ xfs_fsblock_t firstblock;
+ xfs_bmap_free_t flist;
+
+ /*
+ * Map the range first and check that it is a delalloc extent
+ * before trying to unmap the range. Otherwise we will be
+ * trying to remove a real extent (which requires a
+ * transaction) or a hole, which is probably a bad idea...
+ */
+ error = xfs_bmapi(NULL, ip, start_fsb, 1,
+ XFS_BMAPI_ENTIRE, NULL, 0, &imap,
+ &nimaps, NULL);
+
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "Failed delalloc mapping lookup ino %lld fsb %lld.",
+ ip->i_ino, start_fsb);
+ }
+ break;
+ }
+ if (!nimaps) {
+ /* nothing there */
+ goto next_block;
+ }
+ if (imap.br_startblock != DELAYSTARTBLOCK) {
+ /* been converted, ignore */
+ goto next_block;
+ }
+ WARN_ON(imap.br_blockcount == 0);
+
+ /*
+ * Note: while we initialise the firstblock/flist pair, they
+ * should never be used because blocks should never be
+ * allocated or freed for a delalloc extent and hence we need
+ * don't cancel or finish them after the xfs_bunmapi() call.
+ */
+ xfs_bmap_init(&flist, &firstblock);
+ error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
+ &flist, &done);
+ if (error)
+ break;
+
+ ASSERT(!flist.xbf_count && !flist.xbf_first);
+next_block:
+ start_fsb++;
+ remaining--;
+ } while(remaining > 0);
+
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 71ec9b6ecdfc..3651191daea1 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
int whichfork,
int *count);
+int
+xfs_bmap_punch_delalloc_range(
+ struct xfs_inode *ip,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t length);
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c60a22..e60490bc00a6 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
ip->i_d.di_format = tip->i_d.di_format;
tip->i_d.di_format = tmp;
+ /*
+ * The extents in the source inode could still contain speculative
+ * preallocation beyond EOF (e.g. the file is open but not modified
+ * while defrag is in progress). In that case, we need to copy over the
+ * number of delalloc blocks the data fork in the source inode is
+ * tracking beyond EOF so that when the fork is truncated away when the
+ * temporary inode is unlinked we don't underrun the i_delayed_blks
+ * counter on that inode.
+ */
+ ASSERT(tip->i_delayed_blks == 0);
+ tip->i_delayed_blks = ip->i_delayed_blks;
+ ip->i_delayed_blks = 0;
+
ilf_fields = XFS_ILOG_CORE;
switch(ip->i_d.di_format) {
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed9990267661..c78cc6a3d87c 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,6 +58,7 @@ xfs_error_trap(int e)
int xfs_etest[XFS_NUM_INJECT_ERROR];
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
+int xfs_error_test_active;
int
xfs_error_test(int error_tag, int *fsidp, char *expression,
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
len = strlen(mp->m_fsname);
xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
strcpy(xfs_etest_fsname[i], mp->m_fsname);
+ xfs_error_test_active++;
return 0;
}
}
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
xfs_etest_fsid[i] = 0LL;
kmem_free(xfs_etest_fsname[i]);
xfs_etest_fsname[i] = NULL;
+ xfs_error_test_active--;
}
}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c2c1a072bb82..f338847f80b8 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level,
#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
#ifdef DEBUG
+extern int xfs_error_test_active;
extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
#define XFS_NUM_INJECT_ERROR 10
#define XFS_TEST_ERROR(expr, mp, tag, rf) \
- ((expr) || \
+ ((expr) || (xfs_error_test_active && \
xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
- (rf)))
+ (rf))))
extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c7ac020705df..7c8d30c453c3 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -657,18 +657,37 @@ xfs_inode_item_unlock(
}
/*
- * This is called to find out where the oldest active copy of the
- * inode log item in the on disk log resides now that the last log
- * write of it completed at the given lsn. Since we always re-log
- * all dirty data in an inode, the latest copy in the on disk log
- * is the only one that matters. Therefore, simply return the
- * given lsn.
+ * This is called to find out where the oldest active copy of the inode log
+ * item in the on disk log resides now that the last log write of it completed
+ * at the given lsn. Since we always re-log all dirty data in an inode, the
+ * latest copy in the on disk log is the only one that matters. Therefore,
+ * simply return the given lsn.
+ *
+ * If the inode has been marked stale because the cluster is being freed, we
+ * don't want to (re-)insert this inode into the AIL. There is a race condition
+ * where the cluster buffer may be unpinned before the inode is inserted into
+ * the AIL during transaction committed processing. If the buffer is unpinned
+ * before the inode item has been committed and inserted, then it is possible
+ * for the buffer to be written and IO completions before the inode is inserted
+ * into the AIL. In that case, we'd be inserting a clean, stale inode into the
+ * AIL which will never get removed. It will, however, get reclaimed which
+ * triggers an assert in xfs_inode_free() complaining about freein an inode
+ * still in the AIL.
+ *
+ * To avoid this, return a lower LSN than the one passed in so that the
+ * transaction committed code will not move the inode forward in the AIL but
+ * will still unpin it properly.
*/
STATIC xfs_lsn_t
xfs_inode_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
+
+ if (xfs_iflags_test(ip, XFS_ISTALE))
+ return lsn - 1;
return lsn;
}