11 files changed, 134 insertions, 95 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2ca3d17eee56..6a34a0f4d37c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -50,8 +50,8 @@ prototypes:
 	int (*rename2) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*readlink) (struct dentry *, char __user *,int);
-	void * (*follow_link) (struct dentry *, struct nameidata *);
-	void (*put_link) (struct dentry *, struct nameidata *, void *);
+	const char *(*follow_link) (struct dentry *, void **);
+	void (*put_link) (struct inode *, void *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int, unsigned int);
 	int (*get_acl)(struct inode *, int);
@@ -164,8 +164,6 @@ the block device inode.  See there for more details.
 
 --------------------------- file_system_type ---------------------------
 prototypes:
-	int (*get_sb) (struct file_system_type *, int,
-		       const char *, void *, struct vfsmount *);
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
 	void (*kill_sb) (struct super_block *);
@@ -198,7 +196,7 @@ prototypes:
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
+	int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*migratepage)(struct address_space *, struct page *, struct page *);
 	int (*launder_page)(struct page *);
 	int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
@@ -431,8 +429,6 @@ prototypes:
 	loff_t (*llseek) (struct file *, loff_t, int);
 	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
-	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
@@ -527,6 +523,7 @@ prototypes:
 	void (*close)(struct vm_area_struct*);
 	int (*fault)(struct vm_area_struct*, struct vm_fault *);
 	int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
+	int (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
 	int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
 
 locking rules:
@@ -536,6 +533,7 @@ close:		yes
 fault:		yes		can return with page locked
 map_pages:	yes
 page_mkwrite:	yes		can return with page locked
+pfn_mkwrite:	yes
 access:		yes
 
 	->fault() is called when a previously not present pte is about
@@ -562,6 +560,12 @@ the page has been truncated, the filesystem should not look up a new page
 like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
 will cause the VM to retry the fault.
 
+	->pfn_mkwrite() is the same as page_mkwrite but when the pte is
+VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
+VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
+after this call is to make the pte read-write, unless pfn_mkwrite returns
+an error.
+
 	->access() is called when get_user_pages() fails in
 access_process_vm(), typically used to debug a process through
 /proc/pid/mem or ptrace.  This function is needed only for
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index 7cac200e2a85..7eb762eb3136 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -1,41 +1,15 @@
-Support is available for filesystems that wish to do automounting support (such
-as kAFS which can be found in fs/afs/). This facility includes allowing
-in-kernel mounts to be performed and mountpoint degradation to be
-requested. The latter can also be requested by userspace.
+Support is available for filesystems that wish to do automounting
+support (such as kAFS which can be found in fs/afs/ and NFS in
+fs/nfs/). This facility includes allowing in-kernel mounts to be
+performed and mountpoint degradation to be requested. The latter can
+also be requested by userspace.
 
 
 ======================
 IN-KERNEL AUTOMOUNTING
 ======================
 
-A filesystem can now mount another filesystem on one of its directories by the
-following procedure:
-
- (1) Give the directory a follow_link() operation.
-
-     When the directory is accessed, the follow_link op will be called, and
-     it will be provided with the location of the mountpoint in the nameidata
-     structure (vfsmount and dentry).
-
- (2) Have the follow_link() op do the following steps:
-
-     (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
-         superblock and gain a vfsmount structure representing it.
-
-     (b) Copy the nameidata provided as an argument and substitute the dentry
-	 argument into it the copy.
-
-     (c) Call do_add_mount() to install the new vfsmount into the namespace's
-	 mountpoint tree, thus making it accessible to userspace. Use the
-	 nameidata set up in (b) as the destination.
-
-	 If the mountpoint will be automatically expired, then do_add_mount()
-	 should also be given the location of an expiration list (see further
-	 down).
-
-     (d) Release the path in the nameidata argument and substitute in the new
-	 vfsmount and its root dentry. The ref counts on these will need
-	 incrementing.
+See section "Mount Traps" of  Documentation/filesystems/autofs4.txt
 
 Then from userspace, you can just do something like:
 
@@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY
 ===========================
 
 Automatic expiration of mountpoints is easy, provided you've mounted the
-mountpoint to be expired in the automounting procedure outlined above.
+mountpoint to be expired in the automounting procedure outlined separately.
 
 To do expiration, you need to follow these steps:
 
- (3) Create at least one list off which the vfsmounts to be expired can be
-     hung. Access to this list will be governed by the vfsmount_lock.
+ (1) Create at least one list off which the vfsmounts to be expired can be
+     hung.
 
- (4) In step (2c) above, the call to do_add_mount() should be provided with a
-     pointer to this list. It will hang the vfsmount off of it if it succeeds.
+ (2) When a new mountpoint is created in the ->d_automount method, add
+     the mnt to the list using mnt_set_expiry()
+             mnt_set_expiry(newmnt, &afs_vfsmounts);
 
- (5) When you want mountpoints to be expired, call mark_mounts_for_expiry()
+ (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
      with a pointer to this list. This will process the list, marking every
      vfsmount thereon for potential expiry on the next call.
 
diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.txt
index 1b528b2ad809..fcf4d509d118 100644
--- a/Documentation/filesystems/dlmfs.txt
+++ b/Documentation/filesystems/dlmfs.txt
@@ -5,8 +5,8 @@ system.
 
 dlmfs is built with OCFS2 as it requires most of its infrastructure.
 
-Project web page:    http://oss.oracle.com/projects/ocfs2
-Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
+Project web page:    http://ocfs2.wiki.kernel.org
+Tools web page:      https://github.com/markfasheh/ocfs2-tools
 OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
 
 All code copyright 2005 Oracle except when otherwise noted.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index dac11d7fef27..e9e750e59efc 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -140,6 +140,12 @@ nobarrier              This option can be used if underlying storage guarantees
 fastboot               This option is used when a system wants to reduce mount
                        time as much as possible, even though normal performance
 		       can be sacrificed.
+extent_cache           Enable an extent cache based on rb-tree, it can cache
+                       as many as extent which map between contiguous logical
+                       address and physical address per inode, resulting in
+                       increasing the cache hit ratio.
+noinline_data          Disable the inline data feature, inline data feature is
+                       enabled by default.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
index 724043858b08..95c13aa575ff 100644
--- a/Documentation/filesystems/nfs/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
     To further test the InfiniBand software stack, use IPoIB (this
     assumes you have two IB hosts named host1 and host2):
 
-    host1$ ifconfig ib0 a.b.c.x
-    host2$ ifconfig ib0 a.b.c.y
+    host1$ ip link set dev ib0 up
+    host1$ ip address add dev ib0 a.b.c.x
+    host2$ ip link set dev ib0 up
+    host2$ ip address add dev ib0 a.b.c.y
     host1$ ping a.b.c.y
     host2$ ping a.b.c.x
 
@@ -229,7 +231,8 @@ NFS/RDMA Setup
 
     $ modprobe ib_mthca
     $ modprobe ib_ipoib
-    $ ifconfig ib0 a.b.c.d
+    $ ip li set dev ib0 up
+    $ ip addr add dev ib0 a.b.c.d
 
     NOTE: use unique addresses for the client and server
 
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 28f8c08201e2..4c49e5410595 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -8,8 +8,8 @@ also make it attractive for non-clustered use.
 You'll want to install the ocfs2-tools package in order to at least
 get "mount.ocfs2" and "ocfs2_hb_ctl".
 
-Project web page:    http://oss.oracle.com/projects/ocfs2
-Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
+Project web page:    http://ocfs2.wiki.kernel.org
+Tools git tree:      https://github.com/markfasheh/ocfs2-tools
 OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
 
 All code copyright 2005 Oracle except when otherwise noted.
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index a27c950ece61..6db0e5d1da07 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -159,6 +159,22 @@ overlay filesystem (though an operation on the name of the file such as
 rename or unlink will of course be noticed and handled).
 
 
+Multiple lower layers
+---------------------
+
+Multiple lower layers can now be given using the the colon (":") as a
+separator character between the directory names.  For example:
+
+  mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged
+
+As the example shows, "upperdir=" and "workdir=" may be omitted.  In
+that case the overlay will be read-only.
+
+The specified lower directories will be stacked beginning from the
+rightmost one and going left.  In the above example lower1 will be the
+top, lower2 the middle and lower3 the bottom layer.
+
+
 Non-standard behavior
 ---------------------
 
@@ -196,3 +212,15 @@ Changes to the underlying filesystems while part of a mounted overlay
 filesystem are not allowed.  If the underlying filesystem is changed,
 the behavior of the overlay is undefined, though it will not result in
 a crash or deadlock.
+
+Testsuite
+---------
+
+There's testsuite developed by David Howells at:
+
+  git://git.infradead.org/users/dhowells/unionmount-testsuite.git
+
+Run as root:
+
+  # cd unionmount-testsuite
+  # ./run --ov
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index fa2db081505e..3eae250254d5 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -471,3 +471,32 @@ in your dentry operations instead.
 [mandatory]
 	f_dentry is gone; use f_path.dentry, or, better yet, see if you can avoid
 	it entirely.
+--
+[mandatory]
+	never call ->read() and ->write() directly; use __vfs_{read,write} or
+	wrappers; instead of checking for ->write or ->read being NULL, look for
+	FMODE_CAN_{WRITE,READ} in file->f_mode.
+--
+[mandatory]
+	do _not_ use new_sync_{read,write} for ->read/->write; leave it NULL
+	instead.
+--
+[mandatory]
+	->aio_read/->aio_write are gone.  Use ->read_iter/->write_iter.
+---
+[recommended]
+	for embedded ("fast") symlinks just set inode->i_link to wherever the
+	symlink body is and use simple_follow_link() as ->follow_link().
+--
+[mandatory]
+	calling conventions for ->follow_link() have changed.  Instead of returning
+	cookie and using nd_set_link() to store the body to traverse, we return
+	the body to traverse and store the cookie using explicit void ** argument.
+	nameidata isn't passed at all - nd_jump_link() doesn't need it and
+	nd_[gs]et_link() is gone.
+--
+[mandatory]
+	calling conventions for ->put_link() have changed.  It gets inode instead of
+	dentry,  it does not get nameidata at all and it gets called only when cookie
+	is non-NULL.  Note that link body isn't available anymore, so if you need it,
+	store it as cookie.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a07ba61662ed..c3b6b301d8b0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -200,12 +200,12 @@ contains details information about the process itself.  Its fields are
 explained in Table 1-4.
 
 (for SMP CONFIG users)
-For making accounting scalable, RSS related information are handled in
-asynchronous manner and the vaule may not be very precise. To see a precise
+For making accounting scalable, RSS related information are handled in an
+asynchronous manner and the value may not be very precise. To see a precise
 snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
 It's slow but very precise.
 
-Table 1-2: Contents of the status files (as of 2.6.30-rc7)
+Table 1-2: Contents of the status files (as of 3.20.0)
 ..............................................................................
  Field                       Content
  Name                        filename of the executable
@@ -213,6 +213,7 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7)
                              in an uninterruptible wait, Z is zombie,
 			     T is traced or stopped)
  Tgid                        thread group ID
+ Ngid                        NUMA group ID (0 if none)
  Pid                         process id
  PPid                        process id of the parent process
  TracerPid                   PID of process tracing this process (0 if not)
@@ -220,6 +221,10 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7)
  Gid                         Real, effective, saved set, and  file system GIDs
  FDSize                      number of file descriptor slots currently allocated
  Groups                      supplementary group list
+ NStgid                      descendant namespace thread group ID hierarchy
+ NSpid                       descendant namespace process ID hierarchy
+ NSpgid                      descendant namespace process group ID hierarchy
+ NSsid                       descendant namespace session ID hierarchy
  VmPeak                      peak virtual memory size
  VmSize                      total program size
  VmLck                       locked memory size
@@ -1255,9 +1260,9 @@ Various pieces   of  information about  kernel activity  are  available in the
 since the system first booted.  For a quick look, simply cat the file:
 
   > cat /proc/stat
-  cpu  2255 34 2290 22625563 6290 127 456 0 0
-  cpu0 1132 34 1441 11311718 3675 127 438 0 0
-  cpu1 1123 0 849 11313845 2614 0 18 0 0
+  cpu  2255 34 2290 22625563 6290 127 456 0 0 0
+  cpu0 1132 34 1441 11311718 3675 127 438 0 0 0
+  cpu1 1123 0 849 11313845 2614 0 18 0 0 0
   intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...]
   ctxt 1990473
   btime 1062191376
@@ -1704,6 +1709,10 @@ A typical output is
 	flags:	0100002
 	mnt_id:	19
 
+All locks associated with a file descriptor are shown in its fdinfo too.
+
+lock:       1: FLOCK  ADVISORY  WRITE 359 00:13:11691 0 EOF
+
 The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
 pair provide additional information particular to the objects they represent.
 
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 966b22829f3b..b403b29ef710 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -350,8 +350,8 @@ struct inode_operations {
 	int (*rename2) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*readlink) (struct dentry *, char __user *,int);
-        void * (*follow_link) (struct dentry *, struct nameidata *);
-        void (*put_link) (struct dentry *, struct nameidata *, void *);
+	const char *(*follow_link) (struct dentry *, void **);
+	void (*put_link) (struct inode *, void *);
 	int (*permission) (struct inode *, int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
@@ -436,16 +436,18 @@ otherwise noted.
 
   follow_link: called by the VFS to follow a symbolic link to the
 	inode it points to.  Only required if you want to support
-	symbolic links.  This method returns a void pointer cookie
-	that is passed to put_link().
+	symbolic links.  This method returns the symlink body
+	to traverse (and possibly resets the current position with
+	nd_jump_link()).  If the body won't go away until the inode
+	is gone, nothing else is needed; if it needs to be otherwise
+	pinned, the data needed to release whatever we'd grabbed
+	is to be stored in void * variable passed by address to
+	follow_link() instance.
 
   put_link: called by the VFS to release resources allocated by
-  	follow_link().  The cookie returned by follow_link() is passed
-  	to this method as the last parameter.  It is used by
-  	filesystems such as NFS where page cache is not stable
-  	(i.e. page that was installed when the symbolic link walk
-  	started might not be in the page cache at the end of the
-  	walk).
+	follow_link().  The cookie stored by follow_link() is passed
+	to this method as the last parameter; only called when
+	cookie isn't NULL.
 
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
@@ -590,7 +592,7 @@ struct address_space_operations {
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
+	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
 	int (*launder_page) (struct page *);
@@ -804,8 +806,6 @@ struct file_operations {
 	loff_t (*llseek) (struct file *, loff_t, int);
 	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
-	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
@@ -838,14 +838,10 @@ otherwise noted.
 
   read: called by read(2) and related system calls
 
-  aio_read: vectored, possibly asynchronous read
-
   read_iter: possibly asynchronous read with iov_iter as destination
 
   write: called by write(2) and related system calls
 
-  aio_write: vectored, possibly asynchronous write
-
   write_iter: possibly asynchronous write with iov_iter as source
 
   iterate: called when the VFS needs to read the directory contents
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 0bfafe108357..5a5a05582b58 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -228,30 +228,19 @@ default behaviour.
 Deprecated Mount Options
 ========================
 
-  delaylog/nodelaylog
-	Delayed logging is the only logging method that XFS supports
-	now, so these mount options are now ignored.
-
-	Due for removal in 3.12.
-
-  ihashsize=value
-	In memory inode hashes have been removed, so this option has
-	no function as of August 2007. Option is deprecated.
-
-	Due for removal in 3.12.
+None at present.
 
-  irixsgid
-	This behaviour is now controlled by a sysctl, so the mount
-	option is ignored.
 
-	Due for removal in 3.12.
+Removed Mount Options
+=====================
 
-  osyncisdsync
-  osyncisosync
-	O_SYNC and O_DSYNC are fully supported, so there is no need
-	for these options any more.
+  Name				Removed
+  ----				-------
+  delaylog/nodelaylog		v3.20
+  ihashsize			v3.20
+  irixsgid			v3.20
+  osyncisdsync/osyncisosync	v3.20
 
-	Due for removal in 3.12.
 
 sysctls
 =======