summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 08:56:33 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 08:56:33 -0800
commitb4a45f5fe8078bfc10837dbd5b98735058bc4698 (patch)
treedf6f13a27610a3ec7eb4a661448cd779a8f84c79 /include
parent01539ba2a706ab7d35fc0667dff919ade7f87d63 (diff)
parentb3e19d924b6eaf2ca7d22cba99a517c5171007b6 (diff)
downloadlinux-b4a45f5fe8078bfc10837dbd5b98735058bc4698.tar.bz2
Merge branch 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin
* 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin: (57 commits) fs: scale mntget/mntput fs: rename vfsmount counter helpers fs: implement faster dentry memcmp fs: prefetch inode data in dcache lookup fs: improve scalability of pseudo filesystems fs: dcache per-inode inode alias locking fs: dcache per-bucket dcache hash locking bit_spinlock: add required includes kernel: add bl_list xfs: provide simple rcu-walk ACL implementation btrfs: provide simple rcu-walk ACL implementation ext2,3,4: provide simple rcu-walk ACL implementation fs: provide simple rcu-walk generic_check_acl implementation fs: provide rcu-walk aware permission i_ops fs: rcu-walk aware d_revalidate method fs: cache optimise dentry and inode for rcu-walk fs: dcache reduce branches in lookup path fs: dcache remove d_mounted fs: fs_struct use seqlock fs: rcu-walk for path lookup ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bit_spinlock.h4
-rw-r--r--include/linux/coda_linux.h2
-rw-r--r--include/linux/dcache.h243
-rw-r--r--include/linux/fs.h63
-rw-r--r--include/linux/fs_struct.h3
-rw-r--r--include/linux/fsnotify.h2
-rw-r--r--include/linux/fsnotify_backend.h11
-rw-r--r--include/linux/generic_acl.h2
-rw-r--r--include/linux/list_bl.h144
-rw-r--r--include/linux/mount.h53
-rw-r--r--include/linux/namei.h16
-rw-r--r--include/linux/ncp_fs.h4
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/path.h2
-rw-r--r--include/linux/posix_acl.h19
-rw-r--r--include/linux/rculist_bl.h127
-rw-r--r--include/linux/reiserfs_xattr.h2
-rw-r--r--include/linux/security.h8
-rw-r--r--include/linux/seqlock.h80
-rw-r--r--include/linux/slab.h2
20 files changed, 586 insertions, 203 deletions
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index 7113a32a86ea..e612575a2596 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -1,6 +1,10 @@
#ifndef __LINUX_BIT_SPINLOCK_H
#define __LINUX_BIT_SPINLOCK_H
+#include <linux/kernel.h>
+#include <linux/preempt.h>
+#include <asm/atomic.h>
+
/*
* bit-based spin_lock()
*
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index 2e914d0771b9..4ccc59c1ea82 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations;
/* operations shared over more than one file */
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask);
+int coda_permission(struct inode *inode, int mask, unsigned int flags);
int coda_revalidate_inode(struct dentry *);
int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int coda_setattr(struct dentry *, struct iattr *);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 6a4aea30aa09..bd07758943e0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -4,7 +4,9 @@
#include <asm/atomic.h>
#include <linux/list.h>
#include <linux/rculist.h>
+#include <linux/rculist_bl.h>
#include <linux/spinlock.h>
+#include <linux/seqlock.h>
#include <linux/cache.h>
#include <linux/rcupdate.h>
@@ -45,6 +47,27 @@ struct dentry_stat_t {
};
extern struct dentry_stat_t dentry_stat;
+/*
+ * Compare 2 name strings, return 0 if they match, otherwise non-zero.
+ * The strings are both count bytes long, and count is non-zero.
+ */
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+ const unsigned char *ct, size_t tcount)
+{
+ int ret;
+ if (scount != tcount)
+ return 1;
+ do {
+ ret = (*cs != *ct);
+ if (ret)
+ break;
+ cs++;
+ ct++;
+ tcount--;
+ } while (tcount);
+ return ret;
+}
+
/* Name hashing routines. Initial hash value */
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
#define init_name_hash() 0
@@ -81,25 +104,33 @@ full_name_hash(const unsigned char *name, unsigned int len)
* large memory footprint increase).
*/
#ifdef CONFIG_64BIT
-#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */
+# define DNAME_INLINE_LEN 32 /* 192 bytes */
#else
-#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */
+# ifdef CONFIG_SMP
+# define DNAME_INLINE_LEN 36 /* 128 bytes */
+# else
+# define DNAME_INLINE_LEN 40 /* 128 bytes */
+# endif
#endif
struct dentry {
- atomic_t d_count;
+ /* RCU lookup touched fields */
unsigned int d_flags; /* protected by d_lock */
- spinlock_t d_lock; /* per dentry lock */
- int d_mounted;
- struct inode *d_inode; /* Where the name belongs to - NULL is
- * negative */
- /*
- * The next three fields are touched by __d_lookup. Place them here
- * so they all fit in a cache line.
- */
- struct hlist_node d_hash; /* lookup hash list */
+ seqcount_t d_seq; /* per dentry seqlock */
+ struct hlist_bl_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
struct qstr d_name;
+ struct inode *d_inode; /* Where the name belongs to - NULL is
+ * negative */
+ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+
+ /* Ref lookup also touches following */
+ unsigned int d_count; /* protected by d_lock */
+ spinlock_t d_lock; /* per dentry lock */
+ const struct dentry_operations *d_op;
+ struct super_block *d_sb; /* The root of the dentry tree */
+ unsigned long d_time; /* used by d_revalidate */
+ void *d_fsdata; /* fs-specific data */
struct list_head d_lru; /* LRU list */
/*
@@ -111,12 +142,6 @@ struct dentry {
} d_u;
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
- unsigned long d_time; /* used by d_revalidate */
- const struct dentry_operations *d_op;
- struct super_block *d_sb; /* The root of the dentry tree */
- void *d_fsdata; /* fs-specific data */
-
- unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};
/*
@@ -133,96 +158,61 @@ enum dentry_d_lock_class
struct dentry_operations {
int (*d_revalidate)(struct dentry *, struct nameidata *);
- int (*d_hash) (struct dentry *, struct qstr *);
- int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
- int (*d_delete)(struct dentry *);
+ int (*d_hash)(const struct dentry *, const struct inode *,
+ struct qstr *);
+ int (*d_compare)(const struct dentry *, const struct inode *,
+ const struct dentry *, const struct inode *,
+ unsigned int, const char *, const struct qstr *);
+ int (*d_delete)(const struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
-};
-
-/* the dentry parameter passed to d_hash and d_compare is the parent
- * directory of the entries to be compared. It is used in case these
- * functions need any directory specific information for determining
- * equivalency classes. Using the dentry itself might not work, as it
- * might be a negative dentry which has no information associated with
- * it */
+} ____cacheline_aligned;
/*
-locking rules:
- big lock dcache_lock d_lock may block
-d_revalidate: no no no yes
-d_hash no no no yes
-d_compare: no yes yes no
-d_delete: no yes no no
-d_release: no no no yes
-d_iput: no no no yes
+ * Locking rules for dentry_operations callbacks are to be found in
+ * Documentation/filesystems/Locking. Keep it updated!
+ *
+ * FUrther descriptions are found in Documentation/filesystems/vfs.txt.
+ * Keep it updated too!
*/
/* d_flags entries */
#define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */
-#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly
- * renamed" and has to be
- * deleted on the last dput()
- */
-#define DCACHE_DISCONNECTED 0x0004
- /* This dentry is possibly not currently connected to the dcache tree,
- * in which case its parent will either be itself, or will have this
- * flag as well. nfsd will not use a dentry with this bit set, but will
- * first endeavour to clear the bit either by discovering that it is
- * connected, or by performing lookup operations. Any filesystem which
- * supports nfsd_operations MUST have a lookup function which, if it finds
- * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move
- * that dentry into place and return that dentry rather than the passed one,
- * typically using d_splice_alias.
- */
+#define DCACHE_NFSFS_RENAMED 0x0002
+ /* this dentry has been "silly renamed" and has to be deleted on the last
+ * dput() */
+
+#define DCACHE_DISCONNECTED 0x0004
+ /* This dentry is possibly not currently connected to the dcache tree, in
+ * which case its parent will either be itself, or will have this flag as
+ * well. nfsd will not use a dentry with this bit set, but will first
+ * endeavour to clear the bit either by discovering that it is connected,
+ * or by performing lookup operations. Any filesystem which supports
+ * nfsd_operations MUST have a lookup function which, if it finds a
+ * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that
+ * dentry into place and return that dentry rather than the passed one,
+ * typically using d_splice_alias. */
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010
-
-#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */
+#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
+ /* Parent inode is watched by inotify */
#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */
-
-#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */
+#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080
+ /* Parent inode is watched by some fsnotify listener */
#define DCACHE_CANT_MOUNT 0x0100
+#define DCACHE_GENOCIDE 0x0200
+#define DCACHE_MOUNTED 0x0400 /* is a mountpoint */
-extern spinlock_t dcache_lock;
-extern seqlock_t rename_lock;
-
-/**
- * d_drop - drop a dentry
- * @dentry: dentry to drop
- *
- * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
- * be found through a VFS lookup any more. Note that this is different from
- * deleting the dentry - d_delete will try to mark the dentry negative if
- * possible, giving a successful _negative_ lookup, while d_drop will
- * just make the cache lookup fail.
- *
- * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
- * reason (NFS timeouts or autofs deletes).
- *
- * __d_drop requires dentry->d_lock.
- */
-
-static inline void __d_drop(struct dentry *dentry)
-{
- if (!(dentry->d_flags & DCACHE_UNHASHED)) {
- dentry->d_flags |= DCACHE_UNHASHED;
- hlist_del_rcu(&dentry->d_hash);
- }
-}
+#define DCACHE_OP_HASH 0x1000
+#define DCACHE_OP_COMPARE 0x2000
+#define DCACHE_OP_REVALIDATE 0x4000
+#define DCACHE_OP_DELETE 0x8000
-static inline void d_drop(struct dentry *dentry)
-{
- spin_lock(&dcache_lock);
- spin_lock(&dentry->d_lock);
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
-}
+extern seqlock_t rename_lock;
static inline int dname_external(struct dentry *dentry)
{
@@ -235,10 +225,14 @@ static inline int dname_external(struct dentry *dentry)
extern void d_instantiate(struct dentry *, struct inode *);
extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
+extern void __d_drop(struct dentry *dentry);
+extern void d_drop(struct dentry *dentry);
extern void d_delete(struct dentry *);
+extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
+extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern struct dentry * d_obtain_alias(struct inode *);
@@ -296,14 +290,40 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in
return res;
}
+extern void dentry_update_name_case(struct dentry *, struct qstr *);
+
/* used for rename() and baskets */
extern void d_move(struct dentry *, struct dentry *);
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
/* appendix may either be NULL or be used for transname suffixes */
-extern struct dentry * d_lookup(struct dentry *, struct qstr *);
-extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
-extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *);
+extern struct dentry *d_lookup(struct dentry *, struct qstr *);
+extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
+extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
+extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+ unsigned *seq, struct inode **inode);
+
+/**
+ * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
+ * @dentry: dentry to take a ref on
+ * @seq: seqcount to verify against
+ * @Returns: 0 on failure, else 1.
+ *
+ * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
+ * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
+ */
+static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
+{
+ int ret = 0;
+
+ assert_spin_locked(&dentry->d_lock);
+ if (!read_seqcount_retry(&dentry->d_seq, seq)) {
+ ret = 1;
+ dentry->d_count++;
+ }
+
+ return ret;
+}
/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
@@ -316,34 +336,37 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
extern char *__d_path(const struct path *path, struct path *root, char *, int);
extern char *d_path(const struct path *, char *, int);
extern char *d_path_with_unreachable(const struct path *, char *, int);
-extern char *__dentry_path(struct dentry *, char *, int);
+extern char *dentry_path_raw(struct dentry *, char *, int);
extern char *dentry_path(struct dentry *, char *, int);
/* Allocation counts.. */
/**
- * dget, dget_locked - get a reference to a dentry
+ * dget, dget_dlock - get a reference to a dentry
* @dentry: dentry to get a reference to
*
* Given a dentry or %NULL pointer increment the reference count
* if appropriate and return the dentry. A dentry will not be
- * destroyed when it has references. dget() should never be
- * called for dentries with zero reference counter. For these cases
- * (preferably none, functions in dcache.c are sufficient for normal
- * needs and they take necessary precautions) you should hold dcache_lock
- * and call dget_locked() instead of dget().
+ * destroyed when it has references.
*/
-
+static inline struct dentry *dget_dlock(struct dentry *dentry)
+{
+ if (dentry)
+ dentry->d_count++;
+ return dentry;
+}
+
static inline struct dentry *dget(struct dentry *dentry)
{
if (dentry) {
- BUG_ON(!atomic_read(&dentry->d_count));
- atomic_inc(&dentry->d_count);
+ spin_lock(&dentry->d_lock);
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
}
return dentry;
}
-extern struct dentry * dget_locked(struct dentry *);
+extern struct dentry *dget_parent(struct dentry *dentry);
/**
* d_unhashed - is dentry hashed
@@ -374,21 +397,11 @@ static inline void dont_mount(struct dentry *dentry)
spin_unlock(&dentry->d_lock);
}
-static inline struct dentry *dget_parent(struct dentry *dentry)
-{
- struct dentry *ret;
-
- spin_lock(&dentry->d_lock);
- ret = dget(dentry->d_parent);
- spin_unlock(&dentry->d_lock);
- return ret;
-}
-
extern void dput(struct dentry *);
static inline int d_mountpoint(struct dentry *dentry)
{
- return dentry->d_mounted;
+ return dentry->d_flags & DCACHE_MOUNTED;
}
extern struct vfsmount *lookup_mnt(struct path *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 090f0eacde29..baf3e556ff0e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -392,6 +392,7 @@ struct inodes_stat_t {
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fiemap.h>
+#include <linux/rculist_bl.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
@@ -733,16 +734,31 @@ struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1))
struct inode {
+ /* RCU path lookup touches following: */
+ umode_t i_mode;
+ uid_t i_uid;
+ gid_t i_gid;
+ const struct inode_operations *i_op;
+ struct super_block *i_sb;
+
+ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
+ unsigned int i_flags;
+ struct mutex i_mutex;
+
+ unsigned long i_state;
+ unsigned long dirtied_when; /* jiffies of first dirtying */
+
struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
- struct list_head i_dentry;
+ union {
+ struct list_head i_dentry;
+ struct rcu_head i_rcu;
+ };
unsigned long i_ino;
atomic_t i_count;
unsigned int i_nlink;
- uid_t i_uid;
- gid_t i_gid;
dev_t i_rdev;
unsigned int i_blkbits;
u64 i_version;
@@ -755,13 +771,8 @@ struct inode {
struct timespec i_ctime;
blkcnt_t i_blocks;
unsigned short i_bytes;
- umode_t i_mode;
- spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
- struct mutex i_mutex;
struct rw_semaphore i_alloc_sem;
- const struct inode_operations *i_op;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
- struct super_block *i_sb;
struct file_lock *i_flock;
struct address_space *i_mapping;
struct address_space i_data;
@@ -782,11 +793,6 @@ struct inode {
struct hlist_head i_fsnotify_marks;
#endif
- unsigned long i_state;
- unsigned long dirtied_when; /* jiffies of first dirtying */
-
- unsigned int i_flags;
-
#ifdef CONFIG_IMA
/* protected by i_lock */
unsigned int i_readcount; /* struct files open RO */
@@ -1372,13 +1378,13 @@ struct super_block {
const struct xattr_handler **s_xattr;
struct list_head s_inodes; /* all inodes */
- struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
+ struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
#ifdef CONFIG_SMP
struct list_head __percpu *s_files;
#else
struct list_head s_files;
#endif
- /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
+ /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
@@ -1545,9 +1551,18 @@ struct file_operations {
int (*setlease)(struct file *, long, struct file_lock **);
};
+#define IPERM_FLAG_RCU 0x0001
+
struct inode_operations {
- int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+ void * (*follow_link) (struct dentry *, struct nameidata *);
+ int (*permission) (struct inode *, int, unsigned int);
+ int (*check_acl)(struct inode *, int, unsigned int);
+
+ int (*readlink) (struct dentry *, char __user *,int);
+ void (*put_link) (struct dentry *, struct nameidata *, void *);
+
+ int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct inode *,struct dentry *,const char *);
@@ -1556,12 +1571,7 @@ struct inode_operations {
int (*mknod) (struct inode *,struct dentry *,int,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
- int (*readlink) (struct dentry *, char __user *,int);
- void * (*follow_link) (struct dentry *, struct nameidata *);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
void (*truncate) (struct inode *);
- int (*permission) (struct inode *, int);
- int (*check_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1573,7 +1583,7 @@ struct inode_operations {
loff_t len);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
-};
+} ____cacheline_aligned;
struct seq_file;
@@ -2158,8 +2168,8 @@ extern sector_t bmap(struct inode *, sector_t);
#endif
extern int notify_change(struct dentry *, struct iattr *);
extern int inode_permission(struct inode *, int);
-extern int generic_permission(struct inode *, int,
- int (*check_acl)(struct inode *, int));
+extern int generic_permission(struct inode *, int, unsigned int,
+ int (*check_acl)(struct inode *, int, unsigned int));
static inline bool execute_ok(struct inode *inode)
{
@@ -2230,6 +2240,7 @@ extern void iget_failed(struct inode *);
extern void end_writeback(struct inode *);
extern void __destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
+extern void free_inode_nonrcu(struct inode *inode);
extern int should_remove_suid(struct dentry *);
extern int file_remove_suid(struct file *);
@@ -2446,6 +2457,10 @@ static inline ino_t parent_ino(struct dentry *dentry)
{
ino_t res;
+ /*
+ * Don't strictly need d_lock here? If the parent ino could change
+ * then surely we'd have a deeper race in the caller?
+ */
spin_lock(&dentry->d_lock);
res = dentry->d_parent->d_inode->i_ino;
spin_unlock(&dentry->d_lock);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index a42b5bf02f8b..003dc0fd7347 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -2,10 +2,13 @@
#define _LINUX_FS_STRUCT_H
#include <linux/path.h>
+#include <linux/spinlock.h>
+#include <linux/seqlock.h>
struct fs_struct {
int users;
spinlock_t lock;
+ seqcount_t seq;
int umask;
int in_exec;
struct path root, pwd;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index b10bcdeaef76..2a53f10712b3 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,7 +17,6 @@
/*
* fsnotify_d_instantiate - instantiate a dentry for inode
- * Called with dcache_lock held.
*/
static inline void fsnotify_d_instantiate(struct dentry *dentry,
struct inode *inode)
@@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask)
/*
* fsnotify_d_move - dentry has been moved
- * Called with dcache_lock and dentry->d_lock held.
*/
static inline void fsnotify_d_move(struct dentry *dentry)
{
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7380763595d3..69ad89b50489 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
{
struct dentry *parent;
- assert_spin_locked(&dcache_lock);
assert_spin_locked(&dentry->d_lock);
+ /*
+ * Serialisation of setting PARENT_WATCHED on the dentries is provided
+ * by d_lock. If inotify_inode_watched changes after we have taken
+ * d_lock, the following __fsnotify_update_child_dentry_flags call will
+ * find our entry, so it will spin until we complete here, and update
+ * us with the new state.
+ */
parent = dentry->d_parent;
if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
@@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
/*
* fsnotify_d_instantiate - instantiate a dentry for inode
- * Called with dcache_lock held.
*/
static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
{
if (!inode)
return;
- assert_spin_locked(&dcache_lock);
-
spin_lock(&dentry->d_lock);
__fsnotify_update_dcache_flags(dentry);
spin_unlock(&dentry->d_lock);
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index 574bea4013b6..0437e377b555 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler;
int generic_acl_init(struct inode *, struct inode *);
int generic_acl_chmod(struct inode *);
-int generic_check_acl(struct inode *inode, int mask);
+int generic_check_acl(struct inode *inode, int mask, unsigned int flags);
#endif /* LINUX_GENERIC_ACL_H */
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
new file mode 100644
index 000000000000..9ee97e7f2be4
--- /dev/null
+++ b/include/linux/list_bl.h
@@ -0,0 +1,144 @@
+#ifndef _LINUX_LIST_BL_H
+#define _LINUX_LIST_BL_H
+
+#include <linux/list.h>
+
+/*
+ * Special version of lists, where head of the list has a lock in the lowest
+ * bit. This is useful for scalable hash tables without increasing memory
+ * footprint overhead.
+ *
+ * For modification operations, the 0 bit of hlist_bl_head->first
+ * pointer must be set.
+ *
+ * With some small modifications, this can easily be adapted to store several
+ * arbitrary bits (not just a single lock bit), if the need arises to store
+ * some fast and compact auxiliary data.
+ */
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#define LIST_BL_LOCKMASK 1UL
+#else
+#define LIST_BL_LOCKMASK 0UL
+#endif
+
+#ifdef CONFIG_DEBUG_LIST
+#define LIST_BL_BUG_ON(x) BUG_ON(x)
+#else
+#define LIST_BL_BUG_ON(x)
+#endif
+
+
+struct hlist_bl_head {
+ struct hlist_bl_node *first;
+};
+
+struct hlist_bl_node {
+ struct hlist_bl_node *next, **pprev;
+};
+#define INIT_HLIST_BL_HEAD(ptr) \
+ ((ptr)->first = NULL)
+
+static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
+{
+ h->next = NULL;
+ h->pprev = NULL;
+}
+
+#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)
+
+static inline int hlist_bl_unhashed(const struct hlist_bl_node *h)
+{
+ return !h->pprev;
+}
+
+static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
+{
+ return (struct hlist_bl_node *)
+ ((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+}
+
+static inline void hlist_bl_set_first(struct hlist_bl_head *h,
+ struct hlist_bl_node *n)
+{
+ LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+ LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
+ h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
+}
+
+static inline int hlist_bl_empty(const struct hlist_bl_head *h)
+{
+ return !((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+}
+
+static inline void hlist_bl_add_head(struct hlist_bl_node *n,
+ struct hlist_bl_head *h)
+{
+ struct hlist_bl_node *first = hlist_bl_first(h);
+
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ n->pprev = &h->first;
+ hlist_bl_set_first(h, n);
+}
+
+static inline void __hlist_bl_del(struct hlist_bl_node *n)
+{
+ struct hlist_bl_node *next = n->next;
+ struct hlist_bl_node **pprev = n->pprev;
+
+ LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+
+ /* pprev may be `first`, so be careful not to lose the lock bit */
+ *pprev = (struct hlist_bl_node *)
+ ((unsigned long)next |
+ ((unsigned long)*pprev & LIST_BL_LOCKMASK));
+ if (next)
+ next->pprev = pprev;
+}
+
+static inline void hlist_bl_del(struct hlist_bl_node *n)
+{
+ __hlist_bl_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_bl_del_init(struct hlist_bl_node *n)
+{
+ if (!hlist_bl_unhashed(n)) {
+ __hlist_bl_del(n);
+ INIT_HLIST_BL_NODE(n);
+ }
+}
+
+/**
+ * hlist_bl_for_each_entry - iterate over list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_bl_for_each_entry(tpos, pos, head, member) \
+ for (pos = hlist_bl_first(head); \
+ pos && \
+ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @n: another &struct hlist_node to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member) \
+ for (pos = hlist_bl_first(head); \
+ pos && ({ n = pos->next; 1; }) && \
+ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = n)
+
+#endif
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5e7a59408dd4..1869ea24a739 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/nodemask.h>
#include <linux/spinlock.h>
+#include <linux/seqlock.h>
#include <asm/atomic.h>
struct super_block;
@@ -46,12 +47,24 @@ struct mnt_namespace;
#define MNT_INTERNAL 0x4000
+struct mnt_pcp {
+ int mnt_count;
+ int mnt_writers;
+};
+
struct vfsmount {
struct list_head mnt_hash;
struct vfsmount *mnt_parent; /* fs we are mounted on */
struct dentry *mnt_mountpoint; /* dentry of mountpoint */
struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */
+#ifdef CONFIG_SMP
+ struct mnt_pcp __percpu *mnt_pcp;
+ atomic_t mnt_longrefs;
+#else
+ int mnt_count;
+ int mnt_writers;
+#endif
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
int mnt_flags;
@@ -70,57 +83,25 @@ struct vfsmount {
struct mnt_namespace *mnt_ns; /* containing namespace */
int mnt_id; /* mount identifier */
int mnt_group_id; /* peer group identifier */
- /*
- * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
- * to let these frequently modified fields in a separate cache line
- * (so that reads of mnt_flags wont ping-pong on SMP machines)
- */
- atomic_t mnt_count;
int mnt_expiry_mark; /* true if marked for expiry */
int mnt_pinned;
int mnt_ghosts;
-#ifdef CONFIG_SMP
- int __percpu *mnt_writers;
-#else
- int mnt_writers;
-#endif
};
-static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- return mnt->mnt_writers;
-#else
- return &mnt->mnt_writers;
-#endif
-}
-
-static inline struct vfsmount *mntget(struct vfsmount *mnt)
-{
- if (mnt)
- atomic_inc(&mnt->mnt_count);
- return mnt;
-}
-
struct file; /* forward dec */
extern int mnt_want_write(struct vfsmount *mnt);
extern int mnt_want_write_file(struct file *file);
extern int mnt_clone_write(struct vfsmount *mnt);
extern void mnt_drop_write(struct vfsmount *mnt);
-extern void mntput_no_expire(struct vfsmount *mnt);
+extern void mntput(struct vfsmount *mnt);
+extern struct vfsmount *mntget(struct vfsmount *mnt);
+extern void mntput_long(struct vfsmount *mnt);
+extern struct vfsmount *mntget_long(struct vfsmount *mnt);
extern void mnt_pin(struct vfsmount *mnt);
extern void mnt_unpin(struct vfsmount *mnt);
extern int __mnt_is_readonly(struct vfsmount *mnt);
-static inline void mntput(struct vfsmount *mnt)
-{
- if (mnt) {
- mnt->mnt_expiry_mark = 0;
- mntput_no_expire(mnt);
- }
-}
-
extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
const char *name, void *data);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d93642..18d06add0a40 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,10 @@ struct nameidata {
struct path path;
struct qstr last;
struct path root;
+ struct file *file;
+ struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
+ unsigned seq;
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
@@ -41,14 +44,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
* - require a directory
* - ending slashes ok even for nonexistent files
* - internal "there are more path components" flag
- * - locked when lookup done with dcache_lock held
* - dentry cache is untrusted; force a real lookup
*/
-#define LOOKUP_FOLLOW 1
-#define LOOKUP_DIRECTORY 2
-#define LOOKUP_CONTINUE 4
-#define LOOKUP_PARENT 16
-#define LOOKUP_REVAL 64
+#define LOOKUP_FOLLOW 0x0001
+#define LOOKUP_DIRECTORY 0x0002
+#define LOOKUP_CONTINUE 0x0004
+
+#define LOOKUP_PARENT 0x0010
+#define LOOKUP_REVAL 0x0020
+#define LOOKUP_RCU 0x0040
/*
* Intent data
*/
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index ef663061d5ac..1c27f201c856 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -184,13 +184,13 @@ struct ncp_entry_info {
__u8 file_handle[6];
};
-static inline struct ncp_server *NCP_SBP(struct super_block *sb)
+static inline struct ncp_server *NCP_SBP(const struct super_block *sb)
{
return sb->s_fs_info;
}
#define NCP_SERVER(inode) NCP_SBP((inode)->i_sb)
-static inline struct ncp_inode_info *NCP_FINFO(struct inode *inode)
+static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode)
{
return container_of(inode, struct ncp_inode_info, vfs_inode);
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 29d504d5d1c3..0779bb8f95be 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -351,7 +351,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int nfs_permission(struct inode *, int);
+extern int nfs_permission(struct inode *, int, unsigned int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_release(struct inode *, struct file *);
extern int nfs_attribute_timeout(struct inode *inode);
diff --git a/include/linux/path.h b/include/linux/path.h
index edc98dec6266..a581e8c06533 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -10,7 +10,9 @@ struct path {
};
extern void path_get(struct path *);
+extern void path_get_long(struct path *);
extern void path_put(struct path *);
+extern void path_put_long(struct path *);
static inline int path_equal(const struct path *path1, const struct path *path2)
{
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 67608161df6b..d68283a898bb 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -108,6 +108,25 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
return acl;
}
+static inline int negative_cached_acl(struct inode *inode, int type)
+{
+ struct posix_acl **p, *acl;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ p = &inode->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ p = &inode->i_default_acl;
+ break;
+ default:
+ BUG();
+ }
+ acl = ACCESS_ONCE(*p);
+ if (acl)
+ return 0;
+ return 1;
+}
+
static inline void set_cached_acl(struct inode *inode,
int type,
struct posix_acl *acl)
diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h
new file mode 100644
index 000000000000..b872b493724d
--- /dev/null
+++ b/include/linux/rculist_bl.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_RCULIST_BL_H
+#define _LINUX_RCULIST_BL_H
+
+/*
+ * RCU-protected bl list version. See include/linux/list_bl.h.
+ */
+#include <linux/list_bl.h>
+#include <linux/rcupdate.h>
+
+static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
+ struct hlist_bl_node *n)
+{
+ LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
+ LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
+ rcu_assign_pointer(h->first,
+ (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
+}
+
+static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
+{
+ return (struct hlist_bl_node *)
+ ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK);
+}
+
+/**
+ * hlist_bl_del_init_rcu - deletes entry from hash list with re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_bl_unhashed() on the node returns true after this. It is
+ * useful for RCU based read lockfree traversal if the writer side
+ * must know if the list entry is still hashed or already unhashed.
+ *
+ * In particular, it means that we can not poison the forward pointers
+ * that may still be used for walking the hash list and we can only
+ * zero the pprev pointer so list_unhashed() will return true after
+ * this.
+ *
+ * The caller must take whatever precautions are necessary (such as
+ * holding appropriate locks) to avoid racing with another
+ * list-mutation primitive, such as hlist_bl_add_head_rcu() or
+ * hlist_bl_del_rcu(), running on this same list. However, it is
+ * perfectly legal to run concurrently with the _rcu list-traversal
+ * primitives, such as hlist_bl_for_each_entry_rcu().
+ */
+static inline void hlist_bl_del_init_rcu(struct hlist_bl_node *n)
+{
+ if (!hlist_bl_unhashed(n)) {
+ __hlist_bl_del(n);
+ n->pprev = NULL;
+ }
+}
+
+/**
+ * hlist_bl_del_rcu - deletes entry from hash list without re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_bl_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
+ * or hlist_bl_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_bl_for_each_entry().
+ */
+static inline void hlist_bl_del_rcu(struct hlist_bl_node *n)
+{
+ __hlist_bl_del(n);
+ n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_bl_add_head_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist_bl,
+ * while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_bl_add_head_rcu()
+ * or hlist_bl_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_bl_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs. Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
+ struct hlist_bl_head *h)
+{
+ struct hlist_bl_node *first;
+
+ /* don't need hlist_bl_first_rcu because we're under lock */
+ first = hlist_bl_first(h);
+
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ n->pprev = &h->first;
+
+ /* need _rcu because we can have concurrent lock free readers */
+ hlist_bl_set_first_rcu(h, n);
+}
+/**
+ * hlist_bl_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_bl_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_bl_node within the struct.
+ *
+ */
+#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \
+ for (pos = hlist_bl_first_rcu(head); \
+ pos && \
+ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
+ pos = rcu_dereference_raw(pos->next))
+
+#endif
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index b2cf2089769b..3b94c91f20a6 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -41,7 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
int reiserfs_lookup_privroot(struct super_block *sb);
int reiserfs_delete_xattrs(struct inode *inode);
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
-int reiserfs_permission(struct inode *inode, int mask);
+int reiserfs_permission(struct inode *inode, int mask, unsigned int flags);
#ifdef CONFIG_REISERFS_FS_XATTR
#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
diff --git a/include/linux/security.h b/include/linux/security.h
index d47a4c24b3e4..1ac42475ea08 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -457,7 +457,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
* called when the actual read/write operations are performed.
* @inode contains the inode structure to check.
* @mask contains the permission mask.
- * @nd contains the nameidata (may be NULL).
* Return 0 if permission is granted.
* @inode_setattr:
* Check permission before setting file attributes. Note that the kernel
@@ -1713,6 +1712,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
int security_inode_readlink(struct dentry *dentry);
int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
int security_inode_permission(struct inode *inode, int mask);
+int security_inode_exec_permission(struct inode *inode, unsigned int flags);
int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
int security_inode_setxattr(struct dentry *dentry, const char *name,
@@ -2102,6 +2102,12 @@ static inline int security_inode_permission(struct inode *inode, int mask)
return 0;
}
+static inline int security_inode_exec_permission(struct inode *inode,
+ unsigned int flags)
+{
+ return 0;
+}
+
static inline int security_inode_setattr(struct dentry *dentry,
struct iattr *attr)
{
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 632205ccc25d..e98cd2e57194 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -107,7 +107,7 @@ static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
{
smp_rmb();
- return (sl->sequence != start);
+ return unlikely(sl->sequence != start);
}
@@ -125,14 +125,25 @@ typedef struct seqcount {
#define SEQCNT_ZERO { 0 }
#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
-/* Start of read using pointer to a sequence counter only. */
-static inline unsigned read_seqcount_begin(const seqcount_t *s)
+/**
+ * __read_seqcount_begin - begin a seq-read critical section (without barrier)
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
+ * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
+ * provided before actually loading any of the variables that are to be
+ * protected in this critical section.
+ *
+ * Use carefully, only in critical code, and comment how the barrier is
+ * provided.
+ */
+static inline unsigned __read_seqcount_begin(const seqcount_t *s)
{
unsigned ret;
repeat:
ret = s->sequence;
- smp_rmb();
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
@@ -140,14 +151,56 @@ repeat:
return ret;
}
-/*
- * Test if reader processed invalid data because sequence number has changed.
+/**
+ * read_seqcount_begin - begin a seq-read critical section
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * read_seqcount_begin opens a read critical section of the given seqcount.
+ * Validity of the critical section is tested by checking read_seqcount_retry
+ * function.
+ */
+static inline unsigned read_seqcount_begin(const seqcount_t *s)
+{
+ unsigned ret = __read_seqcount_begin(s);
+ smp_rmb();
+ return ret;
+}
+
+/**
+ * __read_seqcount_retry - end a seq-read critical section (without barrier)
+ * @s: pointer to seqcount_t
+ * @start: count, from read_seqcount_begin
+ * Returns: 1 if retry is required, else 0
+ *
+ * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
+ * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
+ * provided before actually loading any of the variables that are to be
+ * protected in this critical section.
+ *
+ * Use carefully, only in critical code, and comment how the barrier is
+ * provided.
+ */
+static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
+{
+ return unlikely(s->sequence != start);
+}
+
+/**
+ * read_seqcount_retry - end a seq-read critical section
+ * @s: pointer to seqcount_t
+ * @start: count, from read_seqcount_begin
+ * Returns: 1 if retry is required, else 0
+ *
+ * read_seqcount_retry closes a read critical section of the given seqcount.
+ * If the critical section was invalid, it must be ignored (and typically
+ * retried).
*/
static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
{
smp_rmb();
- return s->sequence != start;
+ return __read_seqcount_retry(s, start);
}
@@ -167,6 +220,19 @@ static inline void write_seqcount_end(seqcount_t *s)
s->sequence++;
}
+/**
+ * write_seqcount_barrier - invalidate in-progress read-side seq operations
+ * @s: pointer to seqcount_t
+ *
+ * After write_seqcount_barrier, no read-side seq operations will complete
+ * successfully and see data older than this.
+ */
+static inline void write_seqcount_barrier(seqcount_t *s)
+{
+ smp_wmb();
+ s->sequence+=2;
+}
+
/*
* Possible sw/hw IRQ protected versions of the interfaces.
*/
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 59260e21bdf5..fa9086647eb7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -106,8 +106,6 @@ int kmem_cache_shrink(struct kmem_cache *);
void kmem_cache_free(struct kmem_cache *, void *);
unsigned int kmem_cache_size(struct kmem_cache *);
const char *kmem_cache_name(struct kmem_cache *);
-int kern_ptr_validate(const void *ptr, unsigned long size);
-int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
/*
* Please use this macro to create slab caches. Simply specify the